summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--COPYING340
-rw-r--r--ChangeLog1018
-rw-r--r--PROGRAM_LICENSE1
-rw-r--r--README51
-rw-r--r--SOURCES29
-rw-r--r--TODO161
-rw-r--r--TODO.orig153
-rwxr-xr-xada_count27
-rwxr-xr-xappend_license62
-rwxr-xr-xappend_specname57
-rwxr-xr-xasm_count166
-rwxr-xr-xawk_count27
-rwxr-xr-xbreak_filelist1308
-rwxr-xr-xbreak_filelist.orig1084
-rw-r--r--c_count.c225
-rw-r--r--c_lines_environment.dat98
-rw-r--r--c_outfile.dat1
-rwxr-xr-xcobol_count82
-rwxr-xr-xcompute_all87
-rwxr-xr-xcompute_c_usc77
-rwxr-xr-xcompute_java_usc59
-rwxr-xr-xcompute_sloc_lang66
-rwxr-xr-xcount_extensions56
-rwxr-xr-xcount_unknown_ext32
-rwxr-xr-xcsh_count27
-rwxr-xr-xdirmatch37
-rw-r--r--driver.c110
-rw-r--r--driver.h50
-rwxr-xr-xexp_count27
-rwxr-xr-xextract-count83
-rwxr-xr-xextract_license178
-rwxr-xr-xf90_count81
-rwxr-xr-xfortran_count83
-rwxr-xr-xgeneric_count77
-rwxr-xr-xget_sloc544
-rwxr-xr-xget_sloc_details103
-rwxr-xr-xhaskell_count122
-rw-r--r--java_lines_environment.dat98
-rw-r--r--jsp_count.c1787
-rw-r--r--jsp_count.l90
-rwxr-xr-xlex_count70
-rw-r--r--lexcount1.c58
-rwxr-xr-xlinux_unique64
-rwxr-xr-xlisp_count27
-rwxr-xr-xmake_filelists193
-rw-r--r--makefile246
-rw-r--r--makefile.orig222
-rwxr-xr-xmakefile_count27
-rw-r--r--ml_count.c209
-rw-r--r--modula3_count65
-rwxr-xr-xobjc_count89
-rw-r--r--pascal_count.c1714
-rw-r--r--pascal_count.l81
-rwxr-xr-xperl_count147
-rw-r--r--php_count.c335
-rwxr-xr-xprint_sum40
-rwxr-xr-xprint_sum_subset41
-rwxr-xr-xpython_count120
-rwxr-xr-xredo_licenses42
-rwxr-xr-xrpm_unpacker71
-rwxr-xr-xruby_count27
-rwxr-xr-xsed_count27
-rwxr-xr-xsh_count27
-rwxr-xr-xshow_filecount58
-rwxr-xr-xsimplecount84
-rwxr-xr-xsloccount258
-rw-r--r--sloccount.1235
-rw-r--r--sloccount.1.gzbin0 -> 3377 bytes
-rw-r--r--sloccount.html2464
-rw-r--r--sloccount.html.orig2440
-rw-r--r--sloccount.spec56
-rwxr-xr-xsql_count76
-rw-r--r--stripccomments.c50
-rw-r--r--stub22
-rw-r--r--stubsh23
-rw-r--r--table.html569
-rwxr-xr-xtcl_count27
-rw-r--r--testcode/conditions.CBL31
-rw-r--r--testcode/hello.f10
-rw-r--r--testcode/hello.f907
-rw-r--r--testcode/hello.pas9
-rw-r--r--testcode/hello1.pas12
-rw-r--r--testcode/messages.rb152
-rw-r--r--testcode/temp.c5
-rw-r--r--testcode/test.hs19
-rw-r--r--testcode/test1.inc23
-rw-r--r--testcode/test1.lhs15
-rw-r--r--testcode/test1.php27
-rw-r--r--testcode/test2.lhs44
-rw-r--r--testcode/wokka.cbl4
-rw-r--r--testcode/wokka.cs8
-rw-r--r--usc_subset.tar1
92 files changed, 19435 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..eeb586b
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..6db1fdf
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,1018 @@
+2004-08-01 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.26.
+ * Modified driver.h to clearly state the GPL license.
+ This doesn't change anything, but it makes the
+ Savannah people happy.
+
+2004-07-31 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.25. Changes are:
+ * Per request from Savannah, added the more detailed licensing
+ text to every source file.
+ * Modified the assembly language counting code, based on useful
+ feedback and a test case from Purnendu Ghosh, so that
+ the heuristics work better at guessing the right comment character
+ and they perform well.
+ In particular, the comment character '*' is far better supported.
+ * Added support for Delphi project files (.dpr files, which are
+ essentially in Pascal syntax), thanks to Christian Iversen.
+ * Some versions of Perl are apparantly causing trouble, but
+ I have not yet found the solution for them (other than using
+ a different version of Perl). The troublesome line of code
+ in break_filelist, which currently says:
+ open(FH, "-|", "md5sum", $filename) or return undef;
+ This could be changed to:
+ open(FH, "-|", "md5sum $filename") or return undef;
+ But I dare not fix it that way, because that would create
+ a security problem. Imagine downloading someone
+ else's source code (who you don't know), using sloccount, and
+ that other person has created in their source tree a file
+ named like this: "; rm -fr /*" or its variations.
+ I'd rather have the program fail in specific circumstances
+ (users will know when it won't work!) than to insert a known
+ dangerous security vulnerability. I can't reproduce this problem;
+ it's my hope that those who CAN will help me find a good
+ solution. For the moment, I'm documenting the problem here and
+ in the TODO list, so that people will realize WHY it hasn't
+ just been "fixed" with the "obvious solution".
+ The answer: I care about security.
+
+2004-05-10 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.24 - a few minor bugfixes and improvements.
+ Automatically tries to use several different MD5 programs, until
+ it finds one that works - this is more flexible, and as a result,
+ it now works out-of-the-box on Apple Mac OS X.
+ SLOCCount now accepts "." as the directory to analyze,
+ it correctly identifies wrapper scripts left by libtool as
+ automatically generated code, and correctly identifies debian/rules
+ files as makefiles. Also, installation documentation has improved.
+ My thanks to Jesus M. Gonzalez-Barahona for telling me about the
+ Debian bug reports and testing of candidate versions.
+ My thanks to Koryn Grant, who told me what needed to be done
+ to get SLOCCount running on Mac OS X (and for testing my change).
+ This version resolves Debian Bug reports #173699,
+ #159609, and #200348.
+
+2004-04-27 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Automatically try several different MD5 programs, looking for
+ a working one. Originally this program REQUIRED md5sum.
+ This new version tried md5sum, then md5, then openssl.
+ The good news - the program should now 'just work' on
+ Apple Mac OS X. The bad news - if md5sum doesn't exist,
+ sloccount still has a good chance of working, but will display
+ odd error messages while it searches for a working MD5 program.
+ There doesn't seem to be an easy way in perl to suppress such
+ messages while still permitting "trouble reading data file"
+ messages. However, doing the test at run-time is much more
+ robust, and this way it at least has a chance of working on
+ systems it didn't work on at all before.
+ * Removed the "debian" subdirectory. There was no need for it;
+ it's best for the Debian package maintainers to control that
+ information on their own.
+
+2004-04-25 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Allow "." and ".." as specifications for directories even
+ when they have no subdirectories.
+ This resolves Debian bug report log #200348
+ ("Sloccount . fails").
+ * Correctly identify wrapper scripts left by libtool as
+ automatically generated code.
+ When linking against a libtool library, libtool leaves a wrapper
+ script in the source tree (so that the binary can be executed
+ in-place, without installing it), which includes this:
+ (line) # foo - temporary wrapper script for .libs/foo
+ (line) # Generated by ltmain.sh - GNU libtool 1.4.3
+ (1.922.2.111 2002/10/23 02:54:36)
+ I fixed this by saying that any comment beginning with
+ "Generated by" in the first few lines must be auto-generated
+ code. This should correctly catch other auto-generated code too.
+ There is a risk that code NOT automatically generated will be
+ incorrectly labelled, but that's unlikely.
+ This resolves Debian Bug report logs #173699,
+ "sloccount should ignore libtool-generated wrapper scripts".
+ * Now identifies "debian/rules" files as a makefile.
+ This resolves Debian Bug report logs - #159609,
+ "sloccount Does not consider debian/rules to be a makefile".
+ * Minor fix to sloccount makefile, so that man page installs
+ correctly in some situations that didn't before.
+ My thanks to Jesus M. Gonzalez-Barahona.
+
+2003-11-01 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Version 2.23 - a few minor bugfixes and improvements.
+
+2003-11-01 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Fixed incorrect UTF-8 warnings. Perl 5.8.0 creates warnings
+ when the LANG value includes ".UTF-8" but the text files read
+ aren't UTF-8. This causes problems on Red Hat Linux 9 and others,
+ which set LANG to include ".UTF-8" by default.
+ This version quietly removes ".UTF-8" from the LANG value for
+ purposes of sloccount, to eliminate the problem.
+
+2003-11-01 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Fixed bad link to "options" in sloccount.html; my thanks to
+ Barak Zalstein (<Barak.Zalstein, at, ParthusCeva.com) for
+ telling me.
+ * Added "--version" option that prints the version number.
+ Thanks to Auke Jilderda (auke.jilderda, at, philips.com)
+ for suggesting this.
+
+2003-11-01 Sam Tregar <sam, at, tregar.com>
+ * Fixed a bug in perl_count that prevents it from
+ properly skipping POD.
+
+2003-10-30 Julian Squires <julian, at, greyfirst.ca>
+ * Added simple literate Haskell support.
+ * Added test cases for literate Haskell support.
+ * Updated Common LISP and Modula 3 extensions.
+
+2003-03-08 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Version 2.22 - improved OCAML support, thanks to Michal Moskal.
+ Other minor improvements.
+
+2003-02-15 Jay A. St. Pierre
+ * Fixed uninstalling documents to always remove DOC_DIR.
+
+2003-02-15 Michal Moskal
+ * Significantly improved OCAML support - complete rewrite of
+ ML handling.
+
+2003-01-28 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Version 2.21 - improved Fortran support (inc. Fortran 90);
+ my thanks to Erik Schnetter for implementing this!
+
+2002-12-17 Erik Schnetter <schnetter, at, uni-tuebingen.de>
+ * Added support for Fortran 90. Extensions are ".f90" and ".F90".
+ * Changed handling of Fortran 77 to include HPF and Open MP
+ statements, and to accept uppercase ".F77" as extension.
+
+2002-12-04 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Version 2.20 - minor portability and documentation improvements.
+ * Documentation improvements - more discussion on Intermediate COCOMO.
+
+2002-12-04 Linh Luong <Linh.Luong, at, trw.com>
+ * Modified SLOCCount so that it would run on Solaris 2.7
+ (once Perl is installed and the PATH is set correctly to include
+ the directory where SLOCCount is installed).
+ This required modifying file sloccount to eliminate the
+ test ("[") option "-e", replacing it with the "-r" option
+ ("test -e" is apparantly not supported by Solaris 2.7).
+ Since "-r" should be available on any implementation of "test",
+ this is a nice portable change.
+
+2002-11-16 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Version 2.19, documentation improvement.
+ * Documented the "Improved COCOMO" model from Boehm,
+ so that users who want more accurate estimates can do at
+ least a little bit straight from the documentation.
+ For more, as always, see Boehm's book.
+ If anyone wants to implement logical SLOC counting, please be
+ my guest! Then, COCOMO II could be implemented too.
+ * Modified this ChangeLog to document more fully the SGI MIPS problem.
+
+2002-11-16 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Version 2.18, minor bugfix release.
+ * Updated the "wc -l" check; it would cause problems for users
+ who had never used sloccount before (because datadir had not
+ been created yet). Also, the "wc -l" check itself would not
+ reliably identify SGI systems that had horribly buggy "wc"
+ programs; it's believed this is a better check.
+ Thanks to Randal P. Andress for helping with this.
+ * Fixed this ChangeLog. It was Randal P. Andress who identified
+ the "wc -l" bug, not Bob Brown. Sorry for the misattribution,
+ and thanks for the bugfixing help!
+ * Changed rpm building command to work with rpm version 4
+ (as shipped with Red Hat Linux 8.0). As of Red Hat Linux 8,
+ the "rpm" command only loads files, while there is now a
+ separate "rpmbuild" command for creating rpm files.
+ Those rebuilding with Red Hat Linux 7.X or less (rpm < version 4)
+ will need to edit the makefile slightly, as documented
+ in the makefile, to modify the variable RPMBUILD.
+ * "make rpm" now automatically uninstalls sloccount first if it can,
+ to eliminate unnecessary errors when building new versions of
+ sloccount RPMs. This only affects people modifying and
+ redistributing the code of sloccount (mainly, me).
+
+2002-11-16 Randal P. Andress
+ * Fixed get_sloc so that it
+ also accepts --filecounts as well as --filecount.
+
+2002-11-05 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.17, which adds support for Java Server Pages
+ (.jsp), eliminates some warnings in newer Perl implementations,
+ and has a few minor fixes/improvments.
+
+2002-11-18 Randal P. Andress
+ * Randal provided the following additional information about this
+ really nasty problem on SGI MIPS machines. It causes gcc
+ to not work properly, and thus "wc" won't work properly either.
+ SLOCCount now detects that there's a problem and will refuse to
+ run if things are screwed up this badly. For those unfortunate
+ few who have to deal with this case, here's additional information
+ from Randal Andress:
+
+ When gcc is installed on SGI MIPS from source, sgi-mips-sgi-irix6.x,
+ an option specification in the 'specs' file is set
+ incorrectly for n32. The offending line is:
+ %{!mno-long64:-D__LONG_MAX__=9223372036854775807LL}
+ Which (unless option '-mno-long64' is specified), means that
+ LONG_MAX is 64 bits. The trouble is two fold:
+ 1. This should not be the default, since for n32,
+ normally, long is only 32 bits. and
+ 2. The option did not carry into the
+ compiler past the pre-processor - so it did not work.
+ The simplest fix for gcc (it seems that it can be done locally by
+ editing the specs file) is to have the following line to
+ replace the offending line in the specs file:
+ %{long64:-D__LONG_MAX__=9223372036854775807LL}
+ This makes the default 32 and only sets it to 64 if you specify
+ '-long64' which *does* work all the way through the compiler.
+
+ I had the binary for gcc 3 on the sgi freeware site installed here and
+ looked at it's specs file and found no problem (they have the '-long64'
+ option). So it seems that when they build gcc for their freeware
+ distribution, they fix it.
+
+ The problem comes when someone downloads and builds gcc for themselves
+ on sgi. Then the installation is faulty and any n32 code that they
+ build is subject to this flaw if the source makes use of LONG_MAX
+ or any of the values derived from it.
+
+ The real problem turned out to be quite general for sgi n32 gcc. The
+ 'specs' file and mips.h are not consistent resulting in 'LONG_MAX'
+ being given an incorrect value.
+
+ The following 'c' program shows inconsistent values for macros for
+ mips-irix n32:
+ __LONG_MAX__ (LONG_MAX) and
+ _MIPS_SZLONG
+ This seems to stem from an improper default option in the specs file
+ forcing -D__LONG_MAX__=0x7fffffffffffffff
+ to be passed to each compile.
+
+ Here is the test case, compile command, and output:
+
+ # include <limits.h>
+ #define LONG_MAX_32_BITS 2147483647
+ #include <sys/types.h>
+ int main () {
+ #if LONG_MAX <= LONG_MAX_32_BITS
+ printf ("LONG_MAX <= LONG_MAX_32_BITS = 0x%lx\n",LONG_MAX);
+ #else
+ printf ("LONG_MAX > LONG_MAX_32_BITS = 0x%llx\n",LONG_MAX);
+ #endif
+
+ printf ("_MIPS_SZLONG = 0x%x\n",_MIPS_SZLONG);
+ printf ("__LONG_MAX__ = 0x%llx (size:%d)\n",__LONG_MAX__,
+ sizeof
+ (__LONG_MAX__));
+
+ #if LONG_MAX <= LONG_MAX_32_BITS
+ printf ("LONG_MAX = 0x%lx (size:%d)
+ \n",LONG_MAX,sizeof(LONG_MAX));
+ #else
+ printf ("LONG_MAX = 0x%llx (size:%d)
+ \n",LONG_MAX,sizeof(LONG_MAX));
+ #endif
+
+ printf ("LONG_MAX_32_BITS = 0x%x (size:%d)
+ \n",LONG_MAX_32_BITS,sizeof(LONG_MAX_32_BITS));
+ return 0;
+ }
+ ============ end test case source.
+
+ >gcc -n32 -v -o test_limits -O0 -v -g test_limits.c
+
+ defines include:....-D__LONG_MAX__=9223372036854775807LL....
+
+ =========== test output:
+ >test_limits
+ LONG_MAX > LONG_MAX_32_BITS = 0x7fffffffffffffff
+ _MIPS_SZLONG = 0x20
+ __LONG_MAX__ = 0x7fffffffffffffff (size:8)
+ LONG_MAX = 0x7fffffffffffffff (size:8)
+ LONG_MAX_32_BITS = 0x7fffffff (size:4)
+
+
+ ======== end test case output
+
+ By changing the specs entry:
+ %{!mno-long64:-D__LONG_MAX__=9223372036854775807LL}
+ to
+ %{long64:-D__LONG_MAX__=9223372036854775807LL}
+ as is discussed in one of the internet reports I sent earlier, the
+ output,
+ after recompiling and running is:
+
+ LONG_MAX <= LONG_MAX_32_BITS = 0x7fffffff
+ _MIPS_SZLONG = 0x20
+ __LONG_MAX__ = 0x7fffffff (size:4)
+ LONG_MAX = 0x7fffffff (size:4)
+ LONG_MAX_32_BITS = 0x7fffffff (size:4)
+
+ Although I have not studied it well enough to know exactly why, the
+ problem has to do with the size of (long int) and the attempt of the
+ 'memchr' code to determine whether or not it can use 64 bit words
+ rather than 32 bit words in chunking through the string looking
+ for the specified character, "\n"(0x0a) in the case of 'wc'.
+
+2002-11-03 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Fixed makefile install/uninstall scripts to properly handle
+ documentation.
+ * Added simple check at beginning of sloccount's execution
+ to make sure "wc -l" actually works.
+ Randal P. Andress has found that on certain SGI machines, "wc -l"
+ produces the wrong answers. He reports,
+ "You may already know this, but just in case you do not, there is an
+ apparent bug in textutils-1.19 function 'wc' (at least as built on
+ SGI-n32) which is caused by an apparent bug in memchr (*s, c, n).
+ The bug is only evident when counting 'lines only' or
+ 'lines and characters' (i.e., when NOT counting words).
+ The result is that the filecount is short...
+ I replaced the memchr with very simple code and it corrected the
+ problem. I then installed textutils-2.1 which does not seem have
+ the problem."
+ I thought about adding this information just to the documentation,
+ but no one would notice it. By adding a check to the code,
+ most people will neither know nor care about the problem, and
+ the few people it DOES affect will know about the problem
+ right away (instead of reporting wrong answers).
+ Yes, a failing "wc -l" is a pretty horrific bug, but rather
+ than ignore the problem, it's better to detect and address it.
+ * Modified documentation everywhere so that it consistently
+ documents "--filecount" as the correct option for filecounts,
+ not "--filecounts". That way, the documentation is consistent.
+ * However, in an effort to "do the right thing", the program sloccount
+ will accept "--filecounts" as an alternative way to specify
+ --filecount.
+
+2002-11-02 Bob Brown <rlb, at, bluemartini.com>
+ * Contributed code changes to count Java Server Page (.jsp) files.
+ The code does not pull comments out of embedded
+ javascript. We don't consider that a serious limitation at all,
+ since no one should be sending embedded javascript comments
+ to client browsers anyhow. They're extremely rare.
+ David A. Wheeler notes that you could
+ argue that if you _DO_ include such comments, they're
+ not really functioning as comments (since they DO have an
+ affect on the result - they're more like print statements in an
+ older language instead of a traditional language's comments).
+
+2002-11-02 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Eliminated more Perl warnings by adding more
+ defined() wrappers to while() loops in Perl code
+ (based on Randal's suggestion). The problem is that Perl
+ handles the last line of a file oddly if it doesn't end with
+ a newline indicator, and it consists solely of "0".
+
+2002-11-02 Randal P Andress <Randal_P_Andress, at, raytheon.com>
+ * Eliminated some Perl warnings by adding
+ defined() wrappers to while() loops in Perl code.
+
+2002-8-24 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.16, fixed limitations of old Pascal counter.
+
+2002-8-24 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Re-implemented Pascal counter (in flex). This fixes some problems
+ the old counter had - it handles nested comments with different
+ formats, and strings as well.
+ * Removed the BUGS information that described the Pascal counter
+ weaknesses.. since now they're gone!
+ * Added an additional detector of automatically generated files -
+ it's an auto-generated file if it starts with
+ "A lexical scanner generated by flex", since flex adds this.
+ Generally, this isn't a problem, since we already detect
+ the filename and matching .c files, but it seems worth doing.
+
+2002-8-22 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.15, a bugfix + small feature improvement.
+ My sincere thanks to Jesus M. Gonzalez-Barahona, who provided
+ patches with lots of useful improvements.
+
+2002-8-22 Jesus M. Gonzalez-Barahona
+ * Added support for Standard ML (as language "ml").
+ * A patch suggested to the Debian BTS; .hh is also a C++ extension.
+ * Some ".inc" files are actually Pascal, not PHP;
+ now ".inc" files are examined binned to either Pascal or PHP
+ depending on their content.
+ * Improved detection of Pascal files (particularly for Debian
+ package fpc-1.0.4).
+ * php_count was not closing open files before opening a new one,
+ and therefore sloccount could fail to count PHP code given
+ a VERY LONG list of PHP files in one package.
+ * break_filelist had problems with files including <CR> and other
+ weird characters at the end of the filename. Now fixed.
+
+2002-7-24 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.14. Improved Pascal detection, improved
+ Pascal counting, added a reference to CCCC.
+
+2002-7-24 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Modified Pascal counting; the older (*..*) commenting structure
+ is now supported. Note that the Pascal counter is still imperfect;
+ it doesn't handle the prioritization between these two commenting
+ systems, and can be fooled by strings that include a
+ comment start indicator. Rewrites welcome, however, for most
+ people the current code is sufficient. This really needs to be
+ rewritten in flex; languages with strings and multiline comment
+ structures aren't handled correctly with naive Perl code.
+ * Documented the weaknesses in the Pascal counter as BUGS.
+
+2002-7-24 Ian West IWest, at, aethersystems, dot com
+ * Improved heuristic for detecting Pascal programs in break_filelist.
+ Sloccount will now categorize files as Pascal if they have
+ the file type ".pas" as well as ".p", though it still checks
+ the contents to make sure it's really pascal.
+ The heuristic was modified so that it's also considered Pascal
+ if it contains "module" and "end.",
+ or "program", "begin", and "end." in addition to the existing cases.
+
+ (Ian West used sloccount to analyze a system containing
+ about 1.2 million lines of code in almost 10,000 files;
+ ninety percent of it is Ada, and the bulk of the remainder
+ is split between Pascal and SQL. The following is Ian's
+ more detailed explanation for the change):
+
+ VAX Pascal uses "module" instead of "program" for files that
+ have no program block and therefore no "begin".
+ There is also no requirement for a Pascal file to have
+ procedures or functions, which is the case for files that are
+ equivalents of C headers. So I modified the function to
+ allow files to be accepted that only contain either:
+ "module" and "end."; or "program", "begin", and "end.".
+ I considered adding checks for "const", "type", and "var" but
+ decided they were not necessary. I have added the extra cases
+ without changing the existing logic so as not to upset
+ any cases for "unit". It is possible to optimize the logic
+ somewhat, but I felt clarity was better than efficiency.
+
+ I found that some of my Pascal files were getting through
+ only because the word "unit" appeared in certain comments.
+ So I moved the line for filtering out comments above the lines
+ that look for the keywords.
+
+ Pascal in general allows comments in the form (*...*) as well
+ as {...}, so I added a line to remove these.
+
+ After making these changes, all my files were correctly
+ categorized. I also verified that the sample Pascal files
+ from p2c still had the same counts.
+
+ Thank you for developing SLOCCount. It is a very useful tool.
+
+2002-7-15 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Added a reference to CCCC; http://cccc.sourceforge.net/
+
+2002-5-31 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.13.
+ * Code cleanups. Turned on gcc warnings ("-Wall" option) and
+ cleaned up all code that set off a warning.
+ This should make the code more portable as well as cleaner.
+ Made a minor speed optimization on an error branch.
+
+2002-3-30 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.12.
+ * Added a "testcode" directory with some sample source code
+ files for testing. It's small now, but growth is expected.
+ Contributions for this test directory (especially for
+ edge/oddball cases) are welcome.
+
+2002-3-25 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Changed first-line recognizers so that the first line (#!) will
+ matched ignoring case. For most Unix/Linux systems uppercase
+ script statements won't work, but Windows users.
+ * Now recognize SpeedyCGI, a persistent CGI interface for Perl.
+ SpeedyCGI has most of the speed advantages of FastCGI, but
+ has the security advantages of CGI and has the CGI interface
+ (from the application writer's point of view).
+ SpeedyCGI perl scripts have #!/usr/bin/speedy lines instead of
+ #!/usr/bin/perl. More information about SpeedyCGI
+ can be found at http://daemoninc.com/speedycgi/
+ Thanks to Priyadi Iman Nurcahyo for noticing this.
+
+2002-3-15 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Added filter to remove calls to sudo, so
+ "#!/usr/bin/sudo /usr/bin/python" etc as the first line
+ are correctly identified.
+
+2002-3-7 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Added cross-references to LOCC and CodeCount. They don't
+ do what I want.. which is why I wrote my own! .. but others
+ may find them useful.
+
+2002-2-28 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.11.
+ * Added support for C#. Any ".cs" file is presumed
+ to be a C# file. The C SLOC counter is used to count SLOC.
+ Note that C# doesn't have a "header" type (Java doesn't either),
+ so disambiguating headers isn't needed.
+ * Added support for regular Haskell source files (.hs).
+ Their syntax is sufficiently similar that just the regular
+ C SLOC counter works.
+ Note that literate Haskell files (.lhs) are _not_ supported,
+ so be sure to process .lhs files into .hs files before counting.
+ There are two different .lhs conventions; for more info, see:
+ http://www.haskell.org/onlinereport/literate.html
+ * Tweaked COBOL counter slightly. Added support in fixed (default)
+ format for "*" and "/" as comment markers in column 1.
+ * Modified list of file extensions known not to be source code,
+ based on suffixes(7). This speeds things very slightly, but the
+ main goal is to make the "unknown" list smaller.
+ That way, it's much easier to see if many source code files
+ were incorectly ignored. In particular, compressed formats
+ (e.g., ".tgz") and multimedia formats (".wav") were added.
+ * Modified documentation to make things clear: If you want source
+ in a compressed file to be counted (e.g. .zip, .tar, .tgz),
+ you need to uncompress the file first!!
+ * Modified documentation to clarify that literate programming
+ files must be expanded first.
+ * Now recognize ".ph" as Perl (it's "Perl header" code).
+ Please let me know if this creates many false positives
+ (i.e., if there are programs using ".ph" in other ways).
+ * File count_unknown_ext modified slightly so that it now examines
+ ~/.slocdata. Modified documentation so that its use is
+ recommended and explained. It's been there for a while, but
+ with poor documentation I bet few understand its value.
+ * Modified output to clearly say that it's Open Source Software /
+ Free Software, licensed under the GPL. It was already stated
+ that way in the documentation and code, but clearly stating this
+ on every run makes it even harder to miss.
+
+2002-2-27 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.10.
+ * COBOL support added! Now ".cbl" and ".cob" are recognized
+ as COBOL extensions, as well as their uppercase ".CBL" and ".COB".
+ The COBOL counter works as follows:
+ it detects if a "freeform" command has been given. Unless a
+ freeform command's given, a comment has "*" or "/" in column 7,
+ and a SLOC is a non-comment line with
+ at least one non-whitespace in column 8 or later (including
+ columns 72 or greater; it's arguable if a line that's empty
+ before column 72 is really a line or a comment, but I've decided
+ to count such odd things as lines).
+ If we've gone free-format, a comment is a line that has optional
+ whitespace and then "*".. otherwise, a line with nonwhitespace
+ is a SLOC.
+ Is this good enough? I think so, but I'm not a major COBOL user.
+ Feedback from real COBOL users would be welcome.
+ A source for COBOL test programs is:
+ http://www.csis.ul.ie/cobol/examples/default.htm
+ Information on COBOL syntax gathered from various locations, inc.:
+ http://cs.hofstra.edu/~vmaffea1/cobol.html
+ http://support.merant.com/websupport/docs/microfocus/books/
+ nx31books/lrintr.htm
+ * Modified handling of uppercase filename extensions so they'll
+ be recognized as well as the more typicaly lowercase extensions.
+ If a file has one or more uppercase letters - and NO
+ lowercase letters - it's assumed that it may be a refugee from
+ an old OS that supported only uppercase filenames.
+ In that circumstance, if the filename extension doesn't match the
+ set of known extensions, it's made into lowercase and recompared
+ against the set of extensions for source code files.
+ This heuristic should improve recognition of source
+ file types for "old" programs using upper-case-only characters.
+ I do have concern that this may be "too greedy" an algorithm, i.e.,
+ it might claim that some files that aren't really source code
+ are now source code. I don't think it will be a problem, though;
+ many people create filename
+ extensions that only differ by case in most circumstances; the
+ ".c" vs. ".C" thing is an exception, and since Windows folds
+ case it's not a very portable practice. This is a pretty
+ conservative heuristic; I found Cobol programs with lowercase
+ filenames and uppercase extensions ("x.CBL"), which wouldn't
+ be matched by this heuristic. For Cobol and Fortran I put in
+ special ".F", ".CBL", and ".COB" patterns to catch them.
+ With those two actions, the program should manage to
+ correctly identify more source files without incorrectly
+ matching non-source files.
+ * ".f77" is now also accepted as a Fortran77 extension.
+ Thanks to http://www.webopedia.com/quick_ref/fileextensionsfull.html
+ which has lots of extension information.
+ * Fixed a bug in handling top-level directories where there were NO
+ source files at all; in certain cases this would create
+ spurious error messages. (Fix in compute_all).
+
+2002-1-7 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.09.
+
+2002-1-9 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Added support for the Ruby programming language, thanks to
+ patches from Josef Spillner.
+ * Documentation change: added more discussion about COCOMO,
+ in particular why its cost estimates appeared so large.
+ Some programmers think of just the coding part, and only what
+ they'd get paid directly.. but that's less than 10% of the
+ costs.
+
+2002-1-7 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Minor documentation fix - the example for --effort in
+ sloccount.html wasn't quite right (the base documentation
+ for --effort was right, it was just the example that was wrong).
+ My thanks to Kevin the Blue for pointing this out.
+
+2002-1-3 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.08.
+
+2002-1-3 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Based on suggestions by Greg Sjaardema <gdsjaar@sandia.gov>:
+ * Modified c_count.c, function count_file to close the stream
+ after the file is analyzed. Otherwise, this can cause problems
+ with too many open files on some systems, particularly on
+ operating systems with small limits (e.g., Solaris).
+ * Added '.F' as a Fortran extension.
+
+2002-1-2 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 2.07.
+
+2002-1-2 Vaclav Slavik <vaclav.slavik@matfyz.cz>
+ * Modified the RPM .spec file in the following ways:
+ * By default the RPM package now installs into /usr (so binaries
+ go into /usr/bin). Note that those who use the makefile directly
+ ("make install"), including tarball users,
+ will still default to /usr/local instead.
+ You can still make the RPM install to /usr/local by using
+ the prefix option, e.g.:
+ rpm -Uvh --prefix=/usr/local sloccount*.rpm
+ * Made it use %{_prefix} variable, i.e. changing it to install
+ in /usr/local or /usr is a matter of changing one line
+ * Use wildcards in %files section, so that you don't have to modify
+ the specfile when you add new executable
+ * Mods to make it possible to build the RPM as non-root (i.e.
+ BuildRoot support, %defattr in %files, PREFIX passed to make install)
+
+2002-1-2 Jesus M. Gonzalez Barahona <jgb@debian.org>
+ * Added support for Modula-3 (.m3, .i3).
+ * ".sc" files are counted as Lisp.
+ * Modified sloccount to handle EVEN LARGER systems (i.e.,
+ so sloccount will scale even more).
+ In a few cases, parameters were passed on the command line
+ and large systems could be so large that the command line was
+ too long. E.G., Debian GNU/Linux. This caused a large number
+ of changes to different files to remove these scaleability
+ limitations.
+ * All *_count programs now accept "-f filename" and "-f -" options,
+ where 'filename' is a file with a list of filenames to count.
+ Internally the "-f" option with a filename is always used, so
+ that an arbitrarily long list of files can be measured and so
+ that "ps" will show more status information.
+ * compute_sloc_lang modified accordingly.
+ * get_sloc now has a "--stdin" option.
+ * Some small fixes here and there.
+ * This closes Debian bug #126503.
+
+2001-12-28 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released sloccount 2.06.
+
+2001-12-27 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Fixed a minor bug in break_filelist, which caused
+ (in extremely unusual circumstances) a problem when
+ disambiguating C from C++ files in complicated situations
+ where this difference was hard to tell. The symptom: When
+ analyzing some packages (for instance, afterstep-1.6.10 as
+ packaged in Debian 2.2) you would get the following error:
+ Use of uninitialized value in pattern match (m//) at
+ /usr/bin/break_filelist line 962.
+ This could only happen after many other disambiguating rules
+ failed to determine if a file was C or C++ code, so the problem
+ was quite rare.
+ My thanks to Jesus M. Gonzalez-Barahona (in
+ Mostoles, Spain) for the patch that fixes this problem.
+ * Modified man page, explaining the problems of filenames with
+ newlines, and also noting the problems with directories
+ beginning with "-" (they might be confused as options).
+ * Minor improvements to Changelog text, so that the
+ changes over time were documented more clearly.
+ * Note that CEPIS "Upgrade" includes a paper that depends
+ on sloccount. This is "Counting Potatoes: the Size of Debian 2.2"
+ which counts the size of Debian 2.2 (instead of Red Hat Linux,
+ which is what I counted). The original release is at:
+ <http://www.upgrade-cepis.org/issues/2001/6/upgrade-vII-6.html>.
+ I understand that they'll make some tweaks and
+ release a revision of the paper on the Debian website.
+ It's interesting; Debian 2.2 (released in 2000, and
+ which did NOT have KDE), has 56 million physical SLOC and
+ would have cost $1.8 billion USD to develop traditionally.
+ That's more than Red Hat; see <http://www.dwheeler.com/sloc>.
+ Top languages: C (71.12%), C++ (9.79%), LISP, Shell, Perl,
+ Fotran, Tcl, Objective-C, Assembler, Ada, and Python in that
+ order. My thanks to the authors!
+
+2001-10-25 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released sloccount 2.05.
+ * Added support for detecting and counting PHP code.
+ This was slightly tricky, because PHP's syntax has a few "gotchas"
+ like "here document" strings, closing working even in C++ or sh
+ style comments, and so on.
+ Note - HTML files (.html, .htm, etc) are not examined for PHP code.
+ You really shouldn't put a lot of PHP code in HTML documents, because
+ it's a maintenance problem later anyway.
+ The tool assigns every file a single type.. which is a problem,
+ because HTML files could have multiple simultaneous embedded types
+ (PHP, javascript, and HTML text). If the tool was modified to
+ assign multiple languages to a single file, I'm not sure how
+ to handle the file counts (counts of files for each language).
+ For the moment, I just assign HTML to "html".
+ * Modified output so that it adds a header before the language list.
+
+2001-10-23 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released sloccount 2.01 - a minor modification to support
+ Cygwin users.
+ * Modified compute_all to make it more portable (== became =);
+ in particular this should help users using Cygwin.
+ * Modified documentation to note that, if you install Cygwin,
+ you HAVE to use Unix newlines (not DOS newlines) for the Cygwin
+ install. Thanks to Mark Ericson for the bug report & for helping
+ me track that down.
+ * Minor cleanups to the ChangeLog.
+
+2001-08-26 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released sloccount 2.0 - it's getting a new version number because
+ its internal data format changed. You'll have to re-analyze
+ your system for the new sloccount to work.
+ * Improved the heuristics to identify files (esp. .h files)
+ as C, C++, or objective-C. The code now recognizes
+ ".H" (as well as ".h") as header files.
+ The code realizes that ".cpp" files that begin with .\"
+ or ,\" aren't really C++ files - XFree86 stores many
+ man pages with these extensions (ugh).
+ * Added the ability to "--append" analyses.
+ This means that you can analyze some projects, and then
+ repeatedly add new projects. sloccount even stores and
+ recovers md5 checksums, so it even detects duplicates
+ across the projects (the "first" project gets the duplicate).
+ * Added the ability to mark a data directory so that it's not
+ erased (just create a file named "sloc_noerase" in the
+ data directory). From then on, sloccount won't erase it until
+ you remove the file.
+ * Many changes made aren't user-visible.
+ Completely re-organized break_filelist, which was getting
+ incredibly baroque. I've improved the sloccount code
+ so that adding new languages is much simpler; before, it
+ required a number of changes in different places, which was bad.
+ * SLOCCount now creates far fewer files, which is important for
+ analyzing big systems (I was starting to run out of inodes when
+ analyzing entire GNU/Linux distributions).
+ Previous versions created stub files in every child directory
+ for every possible language, even those that weren't used;
+ since most projects only use a few languages, this was costly in
+ terms of inodes. Also, the totals for each language for a given
+ child directory are now in a single file (all-physical.sloc)
+ instead of being in separate files; this not only reduces inode
+ counts, but it also greatly simplifies later processing & eliminated
+ a bug (now, to process all physical SLOC counts in a given child
+ directory, just process that one file).
+
+2001-06-22 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Per Prabhu Ramachandran's suggestion, recognize ".H" files as
+ ".h"/".hpp" files (note the upper case).
+
+2001-06-20 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 1.9. This eliminates installation errors
+ with "sql_count" and "makefile_count",
+ detects PostgreSQL embedded C (in addition to Oracle and Informix),
+ improves detection of Pascal code, and includes support for
+ analyzing licenses (if a directory has the file PROGRAM_LICENSE,
+ the file's contents are assumed to have the license name for that
+ top-level program). It eliminates a portability problem, so
+ hopefully it'll be easier to run it on Unix-like systems.
+ It _still_ requires the "md5sum" program to run.
+
+2001-06-14 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Changed the logic in make_filelists.
+ This version doesn't require a "-L" option to test which GNU
+ programs supported but which others (e.g., Solaris) didn't.
+ It still doesn't normally follow symlinks.
+ Not following subordinate symlinks is important for
+ handling oddities such as pine's build directory
+ /usr/src/redhat/BUILD/pine4.33/ldap in Red Hat 7.1, which
+ includes symlinks to directories not actually inside the
+ package at all (/usr/include and /usr/lib).
+ * Added display of licenses in the summary form, if license
+ information is available.
+ * Added undocumented programs rpm_unpacker and extract_license.
+ These are not installed at this time, they're just provided as
+ a useful starting point if someone wants them.
+
+2001-06-12 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Added support for license counting. If the top directory
+ of a program has a file named "PROGRAM_LICENSE", it's copied to
+ the .slocdata entry, and it's reported as part of a licensing total.
+ Note that the file LICENSE is ignored, that's often more complex.
+
+2001-06-08 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Fixed RPM spec file - it accidentally didn't install
+ makefile_count and sql_count. This would produce spurious
+ errors and inhibited the option of counting makefiles and SQL.
+ Also fixed the makefile to include sql_count in the executable list.
+
+2001-05-16 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Added support for auto-detecting ".pgc" files, which are
+ embedded PostgreSQL - they are assumed to be C files (they COULD
+ be C++ instead; while this will affect categorization it
+ won't affect final SLOC counts). Also, if there's a ".c" with
+ a corresponding ".pgc" file, the ".c" file is assumed to be
+ auto-generated.
+ * Thus, SLOCCount now supports embedded database commands for
+ Oracle, Informix, and PostgreSQL. MySQL doesn't use an
+ "embedded" approach, but uses a library approach that SLOCCount
+ could already handle.
+ * Fixed documentation: HTML reserved characters misused,
+ sql_count undocumented.
+
+
+2001-05-14 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Added modifications from Gordon Hart to improve detection
+ of Pascal source code files.
+ Pascal files which only have a "unit" in them (not a full program),
+ or have "interface" or "implementation",
+ are now detected as Pascal programs.
+ The original Pascal specification didn't support units, but
+ there are Pascal programs which use them. This should result in
+ more accurate counts of Pascal software that uses units.
+ He also reminded me that Pascal is case-insensitive, spurring a
+ modification in the detection routines (for those who insist on
+ uppercase keywords.. a truly UGLY format, but we need to
+ support it to correctly identify such source code as Pascal).
+ * Modified the documentation to note that I prefer unified diffs.
+ I also added a reference to the TODO file, and from here on
+ I'll post the TODO file separately on my web site.
+
+2001-05-02 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 1.8. Added several features to support
+ measuring programs with embedded database commands.
+ This includes suporting many Oracle & Informix embedded file types
+ (.pc, .pcc, .pad, .ec, .ecp). It also optionally counts
+ SQL files (.sql) and makefiles (makefile, Makefile, etc.),
+ though by default they are NOT included in lines-of-code counts.
+ See the (new) TODO file for limitations on makefile identification.
+
+2001-04-30 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Per suggestion from Gary Myer, added optional "--addlang" option
+ to add languages not NORMALLY counted. Currently it only
+ supports "makefile" and "sql". The scheme for detecting
+ automatically generated makefiles could use improvement.
+ Normally, makefiles and sql won't be counted in the final reports,
+ but the front-end will make the calculations and if requested their
+ values will be provided.
+ * Added an "SQL" counter and a "makefile" counter.
+ * Per suggestions from Gary Myer, added detection for files where
+ database commands (Oracle and Informix) are embedded in the code:
+ .pc -> Oracle Preprocessed C code
+ .pcc -> Oracle preprocessed C++ Code
+ .pad -> Oracle preprocessed Ada Code
+ .ec -> Informix preprocessed C code
+ .ecp -> Informix preprocessed C code which calls the C preprocessor
+ before calling the Informix preprocessor.
+ Handling ".pc" has heuristics, since many use ".pc" to mean
+ "stuff about PCs". Certain filenames not counted as C files (e.g.,
+ "makefile.pc" and "README.pc") if they end in ".pc".
+ Note that if you stick C++ code into .pc files, it's counted as C.
+
+ These embedded files are normal source files of the respective
+ language, with database commands stuck into them, e.g.,
+ EXEC SQL select FIELD into :variable from TABLE;
+ which performs a select statement and puts the result into the
+ variable. The database preprocessor simply reads this file,
+ and converts all "EXEC SQL" statements into the appropriate calls
+ and outputs a normal program.
+
+ Currently the "automatically generated" detectors don't detect
+ this case. For the moment, just make sure the generated files
+ aren't around while running SLOCCount.
+
+ Currently the following are not handled (future release?):
+ .pco -> Oracle preprocessed Cobol Code
+ .pfo -> Oracle preprocessed Fortran Code
+ I don't have a Cobol counter. The Fortran counter only works
+ for f77, and I doubt .pfo is limited to that.
+
+
+
+2001-04-27 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Per suggestions from Gary Myer,
+ added ".a" and ".so" to the "not" list, since these are
+ libraries not source, and added the filename "Root" to the
+ "not" file list ("Root" has special meaning to CVS).
+ * Added a note about needing "md5sum" (Gary Myer)
+ * Added a TODO file. If something's on the TODO list that you'd
+ like, please write the code and send it in.
+ * Noted that running on Cygwin is MUCH slower than when running
+ on Linux. Truth in advertizing is only fair.
+
+2001-04-26 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Release version 1.6: the big change is support for running on
+ Windows. Windows users must install Cygwin first.
+ * Modified makefile so that SLOCCount can run on Windows systems
+ if "Cygwin" is installed. The basic modifications to do this
+ were developed by John Clezy -- Thanks!!! I spent time merging
+ his makefile and mine so that a single makefile could be used on
+ both Windows and Unix.
+ * Documented how to install and run SLOCCount on Windows using cygwin.
+ * Changed default prefix to /usr/local; you can set PREFIX to
+ change this, e.g., "make PREFIX=/usr".
+ * When counting a single project, sloccount now also reports
+ "Estimated average number of developers", which is simply
+ the person-months divided by months. As with all estimates, take
+ it with an ocean of salt. This isn't reported for multiproject
+ queries; properly doing this would require "packing" to compensate
+ for the fact that small projects complete before large ones if
+ started simultaneously.
+ * Improved man page (fixed a typo, etc.).
+
+2001-01-10 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released version 1.4. This is an "ease of use" release,
+ greatly simplifying the installation and use of SLOCCount.
+ The new front-end tool "sloccount" does all the work in one step -
+ now just type "sloccount DIRECTORY" and it's all counted.
+ An RPM makes installation trivial for RPM-based systems.
+ A man page is now available. There are now rules for
+ "make install" and "make uninstall" too.
+ Other improvements include a schedule estimator and options
+ to control the effort and schedule estimators.
+
+2001-01-07 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Added an estimator of schedule as well as effort.
+ * Added various options to control the effort and
+ cost estimation: "--effort", "--personcost", "--overhead",
+ and "--schedule".
+ Now people can (through options) control the assumptions made
+ in the effort and cost estimations from the command line.
+ The output now shows the effort estimation model used.
+ * Changed the output slightly to pretty it up and note that
+ it's development EFFORT not TIME that is shown.
+ * Added a note at bottom asking for credit. I don't ask for any
+ money, but I'd like some credit if you refer to the data the
+ tool generates; a gentle reminder in the output seemed like the
+ easiest way to ask for this credit.
+ * Created an RPM package; now RPM-based systems can EASILY
+ install it. It's a relocatable package, so hopefully
+ "alien" can easily translate it to other formats
+ (such as Debian's .deb format).
+ * Created a "man" page for sloccount.
+
+2001-01-06 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Added front-end tool "sloccount", GREATLY improving ease-of-use.
+ The tool "sloccount" invokes all the other SLOCCount tools
+ in the right order, performing a count of a typical project
+ or set of projects. From now on, this is expected to be the
+ "usual" interface, though the pieces will still be documented
+ to help those with more unusual needs.
+ From now on, "SLOCCount" is the entire package, and
+ "sloccount" is this front-end tool.
+ * Added "--datadir" option to make_filelists (to support
+ "sloccount").
+ * get_sloc: No longer displays languages with 0 counts.
+ * Documentation: documented "sloccount"; this caused major changes,
+ since "sloccount" is now the recommended interface for all but
+ those with complicated requirements.
+ * compute_filecount: minor optimization/simplication
+
+2001-01-05 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Released vesion 1.2.
+ * Changed the name of many programs, as part of a general clean-up.
+ I changed "compute_all" to "compute_sloc", and eliminated
+ most of the other "compute_*" files (replacing it with
+ "compute_sloc_lang"). I also changed "get_data" to "get_sloc".
+ This is part of a general clean-up, so that
+ if someone wants to package this program for installation they
+ don't have a thousand tiny programs polluting the namespace.
+ Adding "sloc" to the names makes namespace collisions less likely.
+ I also worked to make the program simpler.
+ * Made a number of documentation fixes - my thanks to Clyde Roby
+ for giving me feedback.
+ * Changed all "*_count" programs to consistently print at the end
+ "Total:" on a line by itself, followed on the next line by
+ the total lines of code all by itself. This makes the new program
+ get_sloc_detail simpler to implement, and also enables
+ get_sloc_detail to perform some error detection.
+ * Changed name of compressed file to ".tar.gz" and modified docs
+ appropriately. The problem is a bug in Netscape 4.7 clients
+ running on Windows; it appears that ".tgz" files don't get fully
+ downloaded from my hosting webserver because no type information
+ is provided. Originally, I tried to change the website to fix this
+ by creating ".htaccess" files, but that didn't work with either:
+ AddEncoding x-gzip gz tgz
+ AddType application/x-tar .tgz
+ or:
+ AddEncoding application/octet-stream tgz
+ So, we'll switch to .tar.gz, which works.
+ My thanks to Christopher Lott for this feedback.
+ * Removed a few garbage files.
+ * Added information to documentation on how to handle HUGE sets
+ of data directory children, i.e., where you can't even use "*"
+ to list the data directory children. I don't have a directory
+ of that kind of scale, so I can't test it directly,
+ but I can at least discuss how to do it; it SHOULD work.
+ * Changed makefile so that "ChangeLog" is now visible on the web.
+
+
+2001-01-04 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * Minor fixes to documentation.
+ * Added "--crossdups" option to break_filelist.
+ * Documented count_unknown_ext.
+ * Created new tool, "get_sloc_detail", and documented it.
+ Now you can get a complete report of all the SLOC data in one big
+ file (e.g., for exporting to another tool for analysis).
+
+2001-01-03 David A. Wheeler <dwheeler, at, dwheeler.com>
+ * First public release, version "1.0", of "SLOCCount".
+ Main website: http://www.dwheeler.com/sloccount
+
diff --git a/PROGRAM_LICENSE b/PROGRAM_LICENSE
new file mode 100644
index 0000000..505faa1
--- /dev/null
+++ b/PROGRAM_LICENSE
@@ -0,0 +1 @@
+GPL
diff --git a/README b/README
new file mode 100644
index 0000000..6b47bea
--- /dev/null
+++ b/README
@@ -0,0 +1,51 @@
+SLOCCount README
+=================
+
+This directory contains "SLOCCount", a set of the programs for counting
+source lines of code (SLOC) in large software systems.
+It was developed by David A. Wheeler (dwheeler@dwheeler.com),
+originally to count SLOC in a Linux (GNU/Linux) system, but it can be
+used for counting other software systems.
+
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+For more information, including installation instructions and license
+information, see the HTML file "sloccount.html".
+The file COPYING includes the license, the standard GNU GPL version 2 license.
+Even better, see the website http://www.dwheeler.com/sloccount.
+
+Some of the files contained in this directory aren't normally used -
+instead, they're scrap special-purpose files I used when I created these
+programs. Feel free to use them, but if you're packaging this program,
+be selective - don't install in /usr/bin every executable here!
+Just install the ones documented, plus the executables they depend on.
+
+Packagers: I've developed an RPM spec file and RPM, so RPM systems can
+just load-and-go. If you use a different package format, you may still
+find the spec file helpful for identifying what to load, and the "alien"
+program can apparantly translate the RPM file to Debian '.deb' format
+without any problem.
+
+Debian already includes a SLOCCount package.
+SLOCCount 2.08 is available in Debian 3.0.
+More info on the status of SLOCCount in Debian should see
+http://packages.debian.org/cgi-bin/search_packages.pl?keywords=sloccount&searchon=names&subword=1&version=all&release=all
+
+
diff --git a/SOURCES b/SOURCES
new file mode 100644
index 0000000..33e8392
--- /dev/null
+++ b/SOURCES
@@ -0,0 +1,29 @@
+
+"CodeCount" toolset for counting SLOC.
+ http://sunset.usc.edu/research/CODECOUNT/
+Covers C/C++/Ada/Java and a few others, but
+NOT Python, TCL, Perl, or LISP.
+I used this to count C, C++, and Java code.
+These tools are under a GPL-like license, but it's NOT the GPL.
+See their website for more info.
+
+A huge list of tools is at (mostly big $$):
+ http://www.concentricmc.com/toolsreport/5-3-2tools1.html
+which extracts from this 1995 paper from STSC:
+ http://www.stsc.hill.af.mil/CrossTalk/1995/apr/Metrics.asp
+
+Another list:
+ http://www.qucis.queensu.ca/Software-Engineering/toolcat.html#label181
+
+USC tools (inc. CodeCount) and info on COCOMO II is at:
+ http://sunset.usc.edu/available_tools/availabletools_main.html
+
+
+Software Metrics: An Analysis of the Evolution of COCOMO and Function Points
+Roger E. Masse
+University of Maryland
+July 8, 1997
+http://www.python.org/~rmasse/papers/software-metrics/
+(good overview paper)
+
+
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..efb2a8a
--- /dev/null
+++ b/TODO
@@ -0,0 +1,161 @@
+TODO List:
+
+
+As with all open source projects... if you want something strongly
+enough, then please (1) code it and submit it, or (2) pay me to add it.
+You have the source, you have the power - use it. Or has been said for years:
+
+ Use the Source, Luke.
+
+I _do_ listen to user requests, but I cannot do everything myself.
+I've released this program under the GPL _specifically_ so that others
+will help debug and extend it.
+
+
+
+Obviously, a general "TODO" is adding support for other computer languages;
+here are languages I'd like to add support for specifically:
++ Eiffel.
++ Sather (much like Eiffel).
++ CORBA IDL.
++ Forth. Comments can start with "\" (backslash) and continue to end-of-line,
+ or be surrounded by parens. In both cases, they must be on word
+ bounds-- .( is not a comment! Variable names often begin with "\"!
+ For example:
+ : 2dup ( n1 n2 -- n1 n2 n1 n2 ) \ Duplicate two numbers.
+ \ Pronounced: two-dupe.
+ over over ;
+ Strings begin with " (doublequote) or p" (p doublequote, for
+ packed strings), and these must be separate words
+ (e.g., followed by a whitespace). They end with a matching ".
+ Also, the ." word begins a string that ends in " (this word immediately
+ prints it the given string).
+ Note that "copy is a perfectly legitimate Forth word, and does NOT
+ start a string.
+ Forth sources can be stored as blocks, or as more conventional text.
+ Any way to detect them?
+ See http://www.forth.org/dpans/dpans.html for syntax definition.
+ See also http://www.taygeta.com/forth_style.html
+ and http://www.forth.org/fig.html
++ Create a "javascript" category. ".js" extention, "js" type.
+ (see below for a discussion of the issues with embedded scripts)
++ .pco -> Oracle preprocessed Cobol Code
++ .pfo -> Oracle preprocessed Fortran Code
++ PL/1.
++ BASIC, including Visual Basic, Future Basic, GW-Basic, QBASIC, etc.
++ Improve Ocamlyacc, comments in yacc part are C-like, but I'm not sure
+ about comment nesting.
+
+ For more language examples, see the ACM "Hello World" project, which tries
+ to collect "Hello World" in every computer language. It's at:
+ http://www2.latech.edu/~acm/HelloWorld.shtml
+
+
+
+Here are other TODOs:
+
+
+* A big one is to add support for logical SLOC, at least for C/C++.
+ Then add support for COCOMO II. Even partial support would be great
+ (e.g., not all languages)... other languages could be displayed as
+ "UNK" (unknown) and be considered 0.
+ Add options to allow display of only one,
+ or of both. See Park's paper, COCOMO II, and Humphrey's 1995 book.
+
+* In general, modify the program so that it ports more easily. Currently,
+ it assumes a Unix-like system (esp. in the shell programs), and it requires
+ md5sum as a separate executable.
+ There are probably some other nonportable constructs, in particular
+ for non-Unix systems (e.g., symlink handling and file/dirnames).
+
+* Rewrite Bourne shell code to either Perl or Python (prob. Python), and
+ make the call to md5sum optional. That way, the program
+ could run on Windows without Cygwin.
+
+* Improve the heuristics for detecting language type.
+ They're actually pretty good already.
+
+* Clean up the program. This was originally written as a one-off program
+ that wouldn't be used again (or distributed!), and it shows.
+
+ The heuristics used to detect language type should
+ be made more modular, so it could be reused in other programs, and
+ so you don't HAVE to write out a list of filenames first if you
+ don't want to.
+
+* Consider rewriting everything not in C into Python. Perl is
+ a write-only language, and it's absurdly hard to read Perl code later.
+ I find Python code much cleaner. And shell isn't as portable.
+
+ One reason I didn't rewrite it in Python is that I had concerns about
+ Python's licensing issues; Python versions 1.6 and up have questionable
+ compatibility with the GPL. Thankfully, the Free Software Foundation (FSF)
+ and the Python developers have worked together, and the Python
+ developers have fixed the license for version 2.0.1 and up.
+ Joy!! I'm VERY happy about this!
+
+* Improve the speed, primarily to support analysis of massive amounts
+ of data. There's a generic routine in Perl; switching that
+ to C would probably help. Perhaps rewriting many of the counters
+ using flex would speed things up, simplify maintenance, and make
+ supporting logical SLOC easier.
+
+* Handle scripts embedded in data.
+ Perhaps create a category, "only the code embedded in HTML"
+ (e.g., Javascript scripts, PHP statements, etc.).
+ This is currently complicated - the whole program assumes that a file
+ can be assigned a specific type, and HTML (etc.) might have multiple
+ languages embedded in it.
+
+* Are any CGI files (.cgi) unhandled? Are files unidentified?
+
+* Improve makefile identification and counting.
+ Currently the program does not identify as makefiles "Imakefile"
+ (generated by xmkmf and processed by imake, used by MIT X server)
+ nor automake/autoconf files (Makefile.am/Makefile.in).
+ Need to handle ".rules" too.
+
+ I didn't just add these files to the "makefile" list, because
+ I have concerns about processing them correctly using the
+ makefile counter. Since most people won't count makefiles anyway,
+ this isn't an issue for most. I welcome patches to change this,
+ _IF_ you ensure that the resulting counts are correct.
+
+ The current version is sufficient for handling programs who have
+ ordinary makefiles that are to be included in the SLOC count when
+ they enable the option to count makefiles.
+
+ Currently the makefiles count "all non-blank lines"; conceivably
+ someone might want to count only the actual directives, not the
+ conditions under which they fire.
+
+* Improve the flexibility in symlink handling; see "make_filelists".
+ It should be rewritten. Some systems don't allow
+ "test"ing for symlinks, which was a portability problem - that problem
+ at least has been removed.
+
+* I've added a few utilities that I use for counting whole Linux systems
+ to the tar file, but they're not installed by the RPM and they're not
+ documented.
+
+* More testing! COBOL in particular is undertested.
+
+* Modify the code, esp. sloccount, to handle systems so large that
+ the data directory list can't be expanded using "*".
+ This would involve using "xargs" in sloccount, maybe getting rid
+ of the separate filelist creation, and having break_filelist
+ call compute_all directly (break_filelist needs to run all the time,
+ or its reloading of hashes during initialization would become the
+ bottleneck). Some of this work has already been done.
+
+* Perl variation support.
+ The code says:
+ open(FH, "-|", "md5sum", $filename) or return undef;
+ but this doesn't work on some Perls.
+ This could be changed to:
+ open(FH, "-|", "md5sum $filename") or return undef;
+ But I dare not fix it that way;
+ imagine a file named "; rm -fr /*" and variations.
+
+
+
diff --git a/TODO.orig b/TODO.orig
new file mode 100644
index 0000000..86c0c20
--- /dev/null
+++ b/TODO.orig
@@ -0,0 +1,153 @@
+TODO List:
+
+
+As with all open source projects... if you want something strongly
+enough, then please (1) code it and submit it, or (2) pay me to add it.
+You have the source, you have the power - use it. Or has been said for years:
+
+ Use the Source, Luke.
+
+I _do_ listen to user requests, but I cannot do everything myself.
+I've released this program under the GPL _specifically_ so that others
+will help debug and extend it.
+
+
+
+Obviously, a general "TODO" is adding support for other computer languages;
+here are languages I'd like to add support for specifically:
++ Eiffel.
++ Sather (much like Eiffel).
++ CORBA IDL.
++ Forth. Comments can start with "\" (backslash) and continue to end-of-line,
+ or be surrounded by parens. In both cases, they must be on word
+ bounds-- .( is not a comment! Variable names often begin with "\"!
+ For example:
+ : 2dup ( n1 n2 -- n1 n2 n1 n2 ) \ Duplicate two numbers.
+ \ Pronounced: two-dupe.
+ over over ;
+ Strings begin with " (doublequote) or p" (p doublequote, for
+ packed strings), and these must be separate words
+ (e.g., followed by a whitespace). They end with a matching ".
+ Also, the ." word begins a string that ends in " (this word immediately
+ prints it the given string).
+ Note that "copy is a perfectly legitimate Forth word, and does NOT
+ start a string.
+ Forth sources can be stored as blocks, or as more conventional text.
+ Any way to detect them?
+ See http://www.forth.org/dpans/dpans.html for syntax definition.
+ See also http://www.taygeta.com/forth_style.html
+ and http://www.forth.org/fig.html
++ Create a "javascript" category. ".js" extention, "js" type.
+ (see below for a discussion of the issues with embedded scripts)
++ .pco -> Oracle preprocessed Cobol Code
++ .pfo -> Oracle preprocessed Fortran Code
++ Fortran beyond Fortran 77 (.f90).
++ PL/1.
++ BASIC, including Visual Basic, Future Basic, GW-Basic, QBASIC, etc.
++ Improve ML/CAML. It uses Pascal-style comments (*..*),
+ double-quoted C-like strings "\n...", and .ml or .mli file extensions
+ (.mli is an interface file for CAML).
+
+ For more language examples, see the ACM "Hello World" project, which tries
+ to collect "Hello World" in every computer language. It's at:
+ http://www2.latech.edu/~acm/HelloWorld.shtml
+
+
+
+Here are other TODOs:
+
+
+* A big one is to add support for logical SLOC, at least for C/C++.
+ Then add support for COCOMO II. Even partial support would be great
+ (e.g., not all languages)... other languages could be displayed as
+ "UNK" (unknown) and be considered 0.
+ Add options to allow display of only one,
+ or of both. See Park's paper, COCOMO II, and Humphrey's 1995 book.
+
+* In general, modify the program so that it ports more easily. Currently,
+ it assumes a Unix-like system (esp. in the shell programs), and it requires
+ md5sum as a separate executable.
+ There are probably some other nonportable constructs, in particular
+ for non-Unix systems (e.g., symlink handling and file/dirnames).
+
+* Rewrite Bourne shell code to either Perl or Python (prob. Python), and
+ make the call to md5sum optional. That way, the program
+ could run on Windows without Cygwin.
+
+* Improve the heuristics for detecting language type.
+ They're actually pretty good already.
+
+* Clean up the program. This was originally written as a one-off program
+ that wouldn't be used again (or distributed!), and it shows.
+
+ The heuristics used to detect language type should
+ be made more modular, so it could be reused in other programs, and
+ so you don't HAVE to write out a list of filenames first if you
+ don't want to.
+
+* Consider rewriting everything not in C into Python. Perl is
+ a write-only language, and it's absurdly hard to read Perl code later.
+ I find Python code much cleaner. And shell isn't as portable.
+
+ One reason I didn't rewrite it in Python is that I had concerns about
+ Python's licensing issues; Python versions 1.6 and up have questionable
+ compatibility with the GPL. Thankfully, the Free Software Foundation (FSF)
+ and the Python developers have worked together, and the Python
+ developers have fixed the license for version 2.0.1 and up.
+ Joy!! I'm VERY happy about this!
+
+* Improve the speed, primarily to support analysis of massive amounts
+ of data. There's a generic routine in Perl; switching that
+ to C would probably help. Perhaps rewriting many of the counters
+ using flex would speed things up, simplify maintenance, and make
+ supporting logical SLOC easier.
+
+* Handle scripts embedded in data.
+ Perhaps create a category, "only the code embedded in HTML"
+ (e.g., Javascript scripts, PHP statements, etc.).
+ This is currently complicated - the whole program assumes that a file
+ can be assigned a specific type, and HTML (etc.) might have multiple
+ languages embedded in it.
+
+* Are any CGI files (.cgi) unhandled? Are files unidentified?
+
+* Improve makefile identification and counting.
+ Currently the program does not identify as makefiles "Imakefile"
+ (generated by xmkmf and processed by imake, used by MIT X server)
+ nor automake/autoconf files (Makefile.am/Makefile.in).
+ Need to handle ".rules" too.
+
+ I didn't just add these files to the "makefile" list, because
+ I have concerns about processing them correctly using the
+ makefile counter. Since most people won't count makefiles anyway,
+ this isn't an issue for most. I welcome patches to change this,
+ _IF_ you ensure that the resulting counts are correct.
+
+ The current version is sufficient for handling programs who have
+ ordinary makefiles that are to be included in the SLOC count when
+ they enable the option to count makefiles.
+
+ Currently the makefiles count "all non-blank lines"; conceivably
+ someone might want to count only the actual directives, not the
+ conditions under which they fire.
+
+* Improve the flexibility in symlink handling; see "make_filelists".
+ It should be rewritten. Some systems don't allow
+ "test"ing for symlinks, which was a portability problem - that problem
+ at least has been removed.
+
+* I've added a few utilities that I use for counting whole Linux systems
+ to the tar file, but they're not installed by the RPM and they're not
+ documented.
+
+* More testing! COBOL in particular is undertested.
+
+* Modify the code, esp. sloccount, to handle systems so large that
+ the data directory list can't be expanded using "*".
+ This would involve using "xargs" in sloccount, maybe getting rid
+ of the separate filelist creation, and having break_filelist
+ call compute_all directly (break_filelist needs to run all the time,
+ or its reloading of hashes during initialization would become the
+ bottleneck). Some of this work has already been done.
+
+
diff --git a/ada_count b/ada_count
new file mode 100755
index 0000000..3204f56
--- /dev/null
+++ b/ada_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count '--' "$@"
+
diff --git a/append_license b/append_license
new file mode 100755
index 0000000..4cea6d5
--- /dev/null
+++ b/append_license
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+
+# Given a 3-column list "sloc build-directory-name spec-filename",
+# output a 4-column list which appends the license.
+# You'll need to fix this up afterwards.
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+$specdir = "/usr/src/redhat/SPECS";
+$garbage = "";
+
+while (<>) {
+ ($sloc, $buildname, $specname, $garbage) = split;
+ chomp($specname);
+ print "$sloc $buildname $specname ";
+
+ if (! (-f "$specdir/$specname")) {
+ die "ERROR. Cound not find spec file $specname\n";
+ }
+
+
+ # Get "Copyright:" or "License:"
+ $license = "";
+ $summary = "";
+ open(SPECFILE, "<$specdir/$specname") || die "Can't open $specname\n";
+ while (<SPECFILE>) {
+ # print;
+ if (m/^Summary\:(.*)/i) { $summary = $1; }
+ if (m/^License\:(.*)/i) { $license = $1; }
+ if ((! $license) && (m/^Copyright\:(.*)/i)) { $license = $1; }
+ }
+ close(SPECFILE);
+
+ if ($license) {print "$license";}
+ else {print "?";}
+
+ # print "\t";
+ # print $summary;
+
+ print "\n";
+
+}
diff --git a/append_specname b/append_specname
new file mode 100755
index 0000000..9b8e97c
--- /dev/null
+++ b/append_specname
@@ -0,0 +1,57 @@
+#!/usr/bin/perl -w
+
+# Given a 2-column list "sloc build-directory-name",
+# output a 3-column list which appends the name of the spec file.
+# You'll need to fix this up afterwards.
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+$specdir = "/usr/src/redhat/SPECS";
+$garbage = "";
+
+while (<>) {
+ ($sloc, $buildname, $garbage) = split;
+ chomp($buildname);
+ print "$sloc $buildname ";
+
+ $unversioned = $buildname;
+ $unversioned =~ s/-[^\-]*$//;
+
+ $reallyshort = $buildname;
+ $reallyshort =~ s/[0-9\.\-_]*$//;
+
+
+ if (-f "$specdir/${buildname}.spec") {print "$buildname";}
+ elsif (-f "$specdir/${buildname}.spec.alpha") {print "${buildname}.alpha";}
+ elsif (-f "$specdir/${buildname}.spec.sparc") {print "${buildname}.sparc";}
+ elsif (-f "$specdir/${unversioned}.spec") {print "$unversioned";}
+ elsif (-f "$specdir/${unversioned}.spec.alpha") {print "${unversioned}.alpha";}
+ elsif (-f "$specdir/${unversioned}.spec.sparc") {print "${unversioned}.sparc";}
+ elsif (-f "$specdir/${reallyshort}.spec") {print "$reallyshort";}
+ elsif (-f "$specdir/${reallyshort}.spec.alpha") {print "${reallyshort}.alpha";}
+ elsif (-f "$specdir/${reallyshort}.spec.sparc") {print "${reallyshort}.sparc";}
+ else {print "?";}
+
+ print "\n";
+
+}
diff --git a/asm_count b/asm_count
new file mode 100755
index 0000000..d7ad0b1
--- /dev/null
+++ b/asm_count
@@ -0,0 +1,166 @@
+#!/usr/bin/perl -w
+# asm_count - count physical lines of code in Assembly programs.
+# Usage: asm_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+# This is a trivial/naive program.
+
+# For each file, it looks at the contents to heuristically determine
+# if C comments are permitted and what the "comment" character is.
+# If /* and */ are in the file, then C comments are permitted.
+# The punctuation mark that starts the most lines must be the comment
+# character (but ignoring "/" if C comments are allowed, and
+# ignoring '#' if cpp commands appear to be used)
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+
+
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ # First, use heuristics to determine the comment char and if it uses C comments
+ $found_c_start = 0;
+ $found_c_end = 0;
+ $cpp_suspicious = 0;
+ $cpp_likely = 0;
+ $cpp_used = 0;
+ %count = ();
+ if ($file eq "") {
+ *CURRENTFILE = *STDIN
+ } else {
+ open(CURRENTFILE, "<$file");
+ }
+ while (<CURRENTFILE>) {
+ if (m!\/\*!) { $found_c_start++;}
+ if (m!\*\/!) { $found_c_end++;}
+ if ( (m!^#\s*define\s!) || (m!^#\s*else!)) {$cpp_suspicious++;}
+ if ( (m!^#\s*ifdef\s!) || (m!^#\s*endif!) || (m!#\s*include!)) {$cpp_likely++;}
+ if (m/^\s*([;!\/#\@\|\*])/) { $count{$1}++; } # Found a likely comment char.
+ }
+ # Done examing file, let's figure out the parameters.
+ if ($found_c_start && $found_c_end) {
+ $ccomments = 1;
+ $count{'/'} = 0;
+ # $count{'*'} = 0; # Do this to ignore '*' if C comments are used.
+ } else {
+ $ccomments = 0;
+ }
+ if (($cpp_suspicious > 2) || ($cpp_likely >= 1)) {
+ $cpp_used = 1;
+ $count{'#'} = 0;
+ } else {
+ $cpp_used = 0;
+ }
+ $likeliest = ';';
+ $likeliest_count = 0;
+ foreach $i (keys(%count)) {
+ # print "DEBUG: key=$i count=$count{$i}\n";
+ if ($count{$i} > $likeliest_count) {
+ $likeliest = $i;
+ $likeliest_count = $count{$i};
+ }
+ }
+ # print "DEBUG: likeliest = $likeliest\n";
+ $commentchar=$likeliest;
+ close(CURRENTFILE);
+
+ # Now count SLOC.
+ $sloc = 0;
+ $isincomment = 0;
+ open(CURRENTFILE, "<$file");
+ while (<CURRENTFILE>) {
+ # We handle C comments first, so that if an EOL-comment
+ # occurs inside a C comment, it's ignored.
+ if ($ccomments) {
+ # Handle C /* */ comments; this will get fooled if they're in strings,
+ # but that would be rare in assembly.
+ while ( (m!\/\*!) || (m!\*\/!)) { # While unprocessed C comment.
+ if ($isincomment) {
+ s!.*?\*\/.*!!;
+ $isincomment = 0;
+ } else { # Not in C comment, but have end comment marker.
+ if (! m/\/\*/) { # Whups, there's no starting marker!
+ print STDERR "Warning: file $file line $. has unmatched comment end\n";
+ # Get us back to a plausible state:
+ s/.*//; # Destroy everything
+ $isincomment = 0;
+ } else {
+ if (! s!\/\*.*?\*\/!!) { # Try to delete whole comment.
+ # We couldn't delete whole comment. Delete what's there.
+ s!\/\*.*!!;
+ $isincomment = 1;
+ }
+ }
+ }
+ }
+ } # End of handling C comments.
+ # This requires $[ be unchanged.
+ $locate_comment = index($_, $commentchar);
+ if ($locate_comment >= 0) { # We found a comment character, delete comment
+ $_ = substr($_, 0, $locate_comment);
+ # print "DEBUG New text: @",$_,"@\n";
+ }
+ # old: s/${commentchar}.*//; # Delete leading comments.
+
+ # FOR DEBUG: print "Finally isincomment=$isincomment line=$_\n";
+ if ((! $isincomment) && (m/\S/)) {$sloc++;}
+ }
+
+ # End-of-file processing
+ print "$sloc (commentchar=$commentchar C-comments=$ccomments) $file\n";
+ $total_sloc += $sloc;
+ $sloc = 0;
+ if ($isincomment) {
+ print STDERR "Missing comment close in file $file\n";
+ }
+}
diff --git a/awk_count b/awk_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/awk_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count '#' $@
+
diff --git a/break_filelist b/break_filelist
new file mode 100755
index 0000000..7df41ab
--- /dev/null
+++ b/break_filelist
@@ -0,0 +1,1308 @@
+#!/usr/bin/perl -w
+
+# break_filelist
+# Take a list of dirs which contain a "filelist";
+# creates files in each directory identifying which are C, C++, Perl, etc.
+# For example, "ansic.dat" lists all ANSI C files contained in filelist.
+# Note: ".h" files are ambiguous (they could be C or C++); the program
+# uses heuristics to determine this.
+# The list of .h files is also contained in h_list.dat.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+
+
+# If adding a new language: add the logic to open the file,
+# close the file, and detect & write to the file listing that language.
+
+# Debatable decisions:
+# Doesn't count .dsl files (stylesheets, which are partially LISP).
+# Doesn't count .sql files (SQL queries & commands)
+
+# Note - I don't try to distinguish between TCL and [incr TCL] (itcl),
+# an OO extended version of TCL. For our purposes, it's all TCL.
+
+
+use FileHandle;
+
+
+# Set default configuration:
+
+$duplicates_okay = 0; # Set to 1 if you want to count file duplicates.
+$crossdups_okay = 0; # Set to 1 if duplicates okay in different filelists.
+$autogen_okay = 0; # Set to 1 if you want to count autogen'ed files.
+$noisy = 0; # Set to 1 if you want noisy reports.
+%lang_list_files = ();
+
+# The following extensions are NOT code:
+%not_code_extensions = (
+ "html" => 1,
+ "in" => 1, # Debatable.
+ "xpm" => 1,
+ "po" => 1,
+ "am" => 1, # Debatable.
+ "1" => 1, # Man pages (documentation):
+ "2" => 1,
+ "3" => 1,
+ "4" => 1,
+ "5" => 1,
+ "6" => 1,
+ "7" => 1,
+ "8" => 1,
+ "9" => 1,
+ "n" => 1,
+ "gif" => 1,
+ "tfm" => 1,
+ "png" => 1,
+ "m4" => 1, # Debatable.
+ "bdf" => 1,
+ "sgml" => 1,
+ "mf" => 1,
+ "txt" => 1, "text" => 1,
+ "man" => 1,
+ "xbm" => 1,
+ "Tag" => 1,
+ "sgm" => 1,
+ "vf" => 1,
+ "tex" => 1,
+ "elc" => 1,
+ "gz" => 1,
+ "dic" => 1,
+ "pfb" => 1,
+ "fig" => 1,
+ "afm" => 1, # font metrics
+ "jpg" => 1,
+ "bmp" => 1,
+ "htm" => 1,
+ "kdelnk" => 1,
+ "desktop" => 1,
+ "pbm" => 1,
+ "pdf" => 1,
+ "ps" => 1, # Postscript is _USUALLY_ generated automatically.
+ "eps" => 1,
+ "doc" => 1,
+ "man" => 1,
+ "o" => 1, # Object code is generated from source code.
+ "a" => 1, # Static object code.
+ "so" => 1, # Dynamically-loaded object code.
+ "Y" => 1, # file compressed with "Yabba"
+ "Z" => 1, # file compressed with "compress"
+ "ad" => 1, # X application default resource file.
+ "arc" => 1, # arc(1) archive
+ "arj" => 1, # arj(1) archive
+ "au" => 1, # Audio sound filearj(1) archive
+ "wav" => 1,
+ "bak" => 1, # Backup files - we only want to count the "real" files.
+ "bz2" => 1, # bzip2(1) compressed file
+ "mp3" => 1, # zip archive
+ "tgz" => 1, # tarball
+ "zip" => 1, # zip archive
+);
+
+# The following filenames are NOT code:
+%not_code_filenames = (
+ "README" => 1,
+ "Readme" => 1,
+ "readme" => 1,
+ "README.tk" => 1, # used in kdemultimedia, it's confusing.
+ "Changelog" => 1,
+ "ChangeLog" => 1,
+ "Repository" => 1,
+ "CHANGES" => 1,
+ "Changes" => 1,
+ ".cvsignore" => 1,
+ "Root" => 1, # CVS.
+ "BUGS" => 1,
+ "TODO" => 1,
+ "COPYING" => 1,
+ "MAINTAINERS" => 1,
+ "Entries" => 1,
+ # Skip "iconfig.h" files; they're used in Imakefiles
+ # (used in xlockmore):
+ "iconfig.h" => 1,
+);
+
+
+# A filename ending in the following extensions usually maps to the
+# given language:
+
+# TODO: See suffixes(7)
+# .al Perl autoload file
+# .am automake input
+
+%file_extensions = (
+ "c" => "ansic",
+ "ec" => "ansic", # Informix C.
+ "ecp" => "ansic", # Informix C.
+ "pgc" => "ansic", # Postgres embedded C/C++ (guess C)
+ "C" => "cpp", "cpp" => "cpp", "cxx" => "cpp", "cc" => "cpp",
+ "pcc" => "cpp", # Input to Oracle C++ preproc.
+ "m" => "objc",
+ # C# (C-sharp) is named 'cs', not 'c#', because
+ # the '#' is a comment character and I'm trying to
+ # avoid bug-prone conventions.
+ # C# doesn't support header files.
+ "cs" => "cs",
+ # Header files are allocated to the "h" language, and then
+ # copied to the correct location later so that C/C++/Objective-C
+ # can be separated.
+ "h" => "h", "H" => "h", "hpp" => "h", "hh" => "h",
+ "ada" => "ada", "adb" => "ada", "ads" => "ada",
+ "pad" => "ada", # Oracle Ada preprocessor.
+ "f" => "fortran", "F" => "fortran", # This catches "wokka.F" as Fortran.
+ # Warning: "Freeze" format also uses .f. Haven't heard of problems,
+ # freeze is extremely rare and even more rare in source code directories.
+ "f77" => "fortran", "F77" => "fortran",
+ "f90" => "f90", "F90" => "f90",
+ "cob" => "cobol", "cbl" => "cobol",
+ "COB" => "cobol", "CBL" => "cobol", # Yes, people do create wokka.CBL files
+ "p" => "pascal", "pas" => "pascal", "pp" => "pascal", "dpr" => "pascal",
+ "py" => "python",
+ "s" => "asm", "S" => "asm", "asm" => "asm",
+ "sh" => "sh", "bash" => "sh",
+ "csh" => "csh", "tcsh" => "csh",
+ "java" => "java",
+ "lisp" => "lisp", "el" => "lisp", "scm" => "lisp", "sc" => "lisp",
+ "lsp" => "lisp", "cl" => "lisp",
+ "jl" => "lisp",
+ "tcl" => "tcl", "tk" => "tcl", "itk" => "tcl",
+ "exp" => "exp",
+ "pl" => "perl", "pm" => "perl", "perl" => "perl", "ph" => "perl",
+ "awk" => "awk",
+ "sed" => "sed",
+ "y" => "yacc",
+ "l" => "lex",
+ "makefile" => "makefile",
+ "sql" => "sql",
+ "php" => "php", "php3" => "php", "php4" => "php", "php5" => "php",
+ "php6" => "php",
+ "inc" => "inc", # inc MAY be PHP - we'll handle it specially.
+ "m3" => "modula3", "i3" => "modula3",
+ "mg" => "modula3", "ig" => "modula3",
+ "ml" => "ml", "mli" => "ml",
+ "mly" => "ml", # ocamlyacc. In fact this is half-yacc half-ML, especially
+ # comments in yacc part are C-like, not ML like.
+ "mll" => "ml", # ocamllex, no such problems as in ocamlyacc
+ "rb" => "ruby",
+ "hs" => "haskell", "lhs" => "haskell",
+ # ???: .pco is Oracle Cobol
+ "jsp" => "jsp", # Java server pages
+);
+
+
+# GLOBAL VARIABLES
+
+$dup_count = 0;
+
+$warning_from_first_line = "";
+
+%examined_directories = (); # Keys = Names of directories examined this run.
+
+$duplistfile = "";
+
+###########
+
+
+# Handle re-opening individual CODE_FILEs.
+# CODE_FILE is public
+
+# Private value:
+$opened_file_name = "";
+
+sub reopen {
+ # Open file if it isn't already, else rewind.
+ # If filename is "", close any open file.
+ my $filename = shift;
+ chomp($filename);
+ # print("DEBUG: reopen($filename)\n");
+ if ($filename eq "") {
+ if ($opened_file_name) {close(CODE_FILE);}
+ $opened_file_name = "";
+ return;
+ }
+ if ($filename eq $opened_file_name) {
+ seek CODE_FILE, 0, 0; # Rewind.
+ } else { # We're opening a new file.
+ if ($opened_file_name) {close(CODE_FILE)}
+ open(CODE_FILE, "<$filename\0") || die "Can't open $filename";
+ $opened_file_name = $filename;
+ }
+}
+
+###########
+
+sub looks_like_cpp {
+ # returns a confidence level - does the file looks like it's C++?
+ my $filename = shift;
+ my $confidence = 0;
+ chomp($filename);
+ open( SUSPECT, "<$filename");
+ while (defined($_ = <SUSPECT>)) {
+ if (m/^\s*class\b.*\{/) { # "}"
+ close(SUSPECT);
+ return 2;
+ }
+ if (m/^\s*class\b/) {
+ $confidence = 1;
+ }
+ }
+ close(SUSPECT);
+ return $confidence;
+}
+
+
+# Cache which files are objective-C or not.
+# Key is the full file pathname; value is 1 if objective-C (else 0).
+%objective_c_files = ();
+
+sub really_is_objc {
+# Given filename, returns TRUE if its contents really are objective-C.
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_objc = 0; # Value to determine.
+ my $brace_lines = 0; # Lines that begin/end with curly braces.
+ my $plus_minus = 0; # Lines that begin with + or -.
+ my $word_main = 0; # Did we find "main("?
+ my $special = 0; # Did we find a special Objective-C pattern?
+
+ # Return cached result, if available:
+ if ($objective_c_files{$filename}) { return $objective_c_files{$filename};}
+
+ open(OBJC_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's objective C.\n";
+ while(<OBJC_FILE>) {
+
+ if (m/^\s*[{}]/ || m/[{}];?\s*$/) { $brace_lines++;}
+ if (m/^\s*[+-]/) {$plus_minus++;}
+ if (m/\bmain\s*\(/) {$word_main++;} # "main" followed by "("?
+ # Handle /usr/src/redhat/BUILD/egcs-1.1.2/gcc/objc/linking.m:
+ if (m/^\s*\[object name\];\s*$/i) {$special=1;}
+ }
+ close(OBJC_FILE);
+
+ if (($brace_lines > 1) && (($plus_minus > 1) || $word_main || $special))
+ {$is_objc = 1;}
+
+ $objective_c_files{$filename} = $is_objc; # Store result in cache.
+
+ return $is_objc;
+}
+
+
+# Cache which files are lex or not.
+# Key is the full file pathname; value is 1 if lex (else 0).
+%lex_files = ();
+
+sub really_is_lex {
+# Given filename, returns TRUE if its contents really is lex.
+# lex file must have "%%", "%{", and "%}".
+# In theory, a lex file doesn't need "%{" and "%}", but in practice
+# they all have them, and requiring them avoid mislabeling a
+# non-lexfile as a lex file.
+
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_lex = 0; # Value to determine.
+ my $percent_percent = 0;
+ my $percent_opencurly = 0;
+ my $percent_closecurly = 0;
+
+ # Return cached result, if available:
+ if ($lex_files{$filename}) { return $lex_files{$filename};}
+
+ open(LEX_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's lex.\n";
+ while(<LEX_FILE>) {
+ $percent_percent++ if (m/^\s*\%\%/);
+ $percent_opencurly++ if (m/^\s*\%\{/);
+ $percent_closecurly++ if (m/^\s*\%\}/);
+ }
+ close(LEX_FILE);
+
+ if ($percent_percent && $percent_opencurly && $percent_closecurly)
+ {$is_lex = 1;}
+
+ $lex_files{$filename} = $is_lex; # Store result in cache.
+
+ return $is_lex;
+}
+
+
+# Cache which files are expect or not.
+# Key is the full file pathname; value is 1 if it is (else 0).
+%expect_files = ();
+
+sub really_is_expect {
+# Given filename, returns TRUE if its contents really are Expect.
+# Many "exp" files (such as in Apache and Mesa) are just "export" data,
+# summarizing something else # (e.g., its interface).
+# Sometimes (like in RPM) it's just misc. data.
+# Thus, we need to look at the file to determine
+# if it's really an "expect" file.
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it's Expect _IF_ it:
+# 1. has "load_lib" command and either "#" comments or {}.
+# 2. {, }, and one of: proc, if, [...], expect
+
+ my $is_expect = 0; # Value to determine.
+
+ my $begin_brace = 0; # Lines that begin with curly braces.
+ my $end_brace = 0; # Lines that begin with curly braces.
+ my $load_lib = 0; # Lines with the Load_lib command.
+ my $found_proc = 0;
+ my $found_if = 0;
+ my $found_brackets = 0;
+ my $found_expect = 0;
+ my $found_pound = 0;
+
+ # Return cached result, if available:
+ if ($expect_files{$filename}) { return expect_files{$filename};}
+
+ open(EXPECT_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's expect.\n";
+ while(<EXPECT_FILE>) {
+
+ if (m/#/) {$found_pound++; s/#.*//;}
+ if (m/^\s*\{/) { $begin_brace++;}
+ if (m/\{\s*$/) { $begin_brace++;}
+ if (m/^\s*\}/) { $end_brace++;}
+ if (m/\};?\s*$/) { $end_brace++;}
+ if (m/^\s*load_lib\s+\S/) { $load_lib++;}
+ if (m/^\s*proc\s/) { $found_proc++;}
+ if (m/^\s*if\s/) { $found_if++;}
+ if (m/\[.*\]/) { $found_brackets++;}
+ if (m/^\s*expect\s/) { $found_expect++;}
+ }
+ close(EXPECT_FILE);
+
+ if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
+ {$is_expect = 1;}
+ if ( $begin_brace && $end_brace &&
+ ($found_proc || $found_if || $found_brackets || $found_expect))
+ {$is_expect = 1;}
+
+ $expect_files{$filename} = $is_expect; # Store result in cache.
+
+ return $is_expect;
+}
+
+
+# Cached values.
+%pascal_files = ();
+
+sub really_is_pascal {
+# Given filename, returns TRUE if its contents really are Pascal.
+
+# This isn't as obvious as it seems.
+# Many ".p" files are Perl files
+# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
+# others are C extractions
+# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
+# and some files in linuxconf).
+# However, test files in "p2c" really are Pascal, for example.
+
+# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
+# is actually C code. The heuristics determine that they're not Pascal,
+# but because it ends in ".p" it's not counted as C code either.
+# I believe this is actually correct behavior, because frankly it
+# looks like it's automatically generated (it's a bitmap expressed as code).
+# Rather than guess otherwise, we don't include it in a list of
+# source files. Let's face it, someone who creates C files ending in ".p"
+# and expects them to be counted by default as C files in SLOCCount needs
+# their head examined. I suggest examining their head
+# with a sucker rod (see syslogd(8) for more on sucker rods).
+
+# This heuristic counts as Pascal such files such as:
+# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
+# Which is hand-generated. We don't count woven documents now anyway,
+# so this is justifiable.
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it's Pascal _IF_ it has all of the following
+# (ignoring {...} and (*...*) comments):
+# 1. "^..program NAME" or "^..unit NAME",
+# 2. "procedure", "function", "^..interface", or "^..implementation",
+# 3. a "begin", and
+# 4. it ends with "end.",
+#
+# Or it has all of the following:
+# 1. "^..module NAME" and
+# 2. it ends with "end.".
+#
+# Or it has all of the following:
+# 1. "^..program NAME",
+# 2. a "begin", and
+# 3. it ends with "end.".
+#
+# The "end." requirements in particular filter out non-Pascal.
+#
+# Note (jgb): this does not detect Pascal main files in fpc, like
+# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
+# it
+
+ my $is_pascal = 0; # Value to determine.
+
+ my $has_program = 0;
+ my $has_unit = 0;
+ my $has_module = 0;
+ my $has_procedure_or_function = 0;
+ my $found_begin = 0;
+ my $found_terminating_end = 0;
+
+ # Return cached result, if available:
+ if ($pascal_files{$filename}) { return pascal_files{$filename};}
+
+ open(PASCAL_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's pascal.\n";
+ while(<PASCAL_FILE>) {
+ s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
+ s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
+ if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;}
+ if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;}
+ if (m/\bmodule\s+[A-Za-z]/i) {$has_module=1;}
+ if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; }
+ if (m/\bfunction\b/i) { $has_procedure_or_function = 1; }
+ if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; }
+ if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
+ if (m/\bbegin\b/i) { $has_begin = 1; }
+ # Originally I said:
+ # "This heuristic fails if there are multi-line comments after
+ # "end."; I haven't seen that in real Pascal programs:"
+ # But jgb found there are a good quantity of them in Debian, specially in
+ # fpc (at the end of a lot of files there is a multiline comment
+ # with the changelog for the file).
+ # Therefore, assume Pascal if "end." appears anywhere in the file.
+ if (m/end\.\s*$/i) {$found_terminating_end = 1;}
+# elsif (m/\S/) {$found_terminating_end = 0;}
+ }
+ close(PASCAL_FILE);
+
+ # Okay, we've examined the entire file looking for clues;
+ # let's use those clues to determine if it's really Pascal:
+
+ if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
+ $has_begin && $found_terminating_end ) ||
+ ( $has_module && $found_terminating_end ) ||
+ ( $has_program && $has_begin && $found_terminating_end ) )
+ {$is_pascal = 1;}
+
+ $pascal_files{$filename} = $is_pascal; # Store result in cache.
+
+ return $is_pascal;
+}
+
+sub really_is_incpascal {
+# Given filename, returns TRUE if its contents really are Pascal.
+# For .inc files (mainly seen in fpc)
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it is Pacal if any of the following:
+# 1. really_is_pascal returns true
+# 2. Any usual reserverd word is found (program, unit, const, begin...)
+
+ # If the general routine for Pascal files works, we have it
+ if (&really_is_pascal ($filename)) {
+ $pascal_files{$filename} = 1;
+ return 1;
+ }
+
+ my $is_pascal = 0; # Value to determine.
+ my $found_begin = 0;
+
+ open(PASCAL_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's pascal.\n";
+ while(<PASCAL_FILE>) {
+ s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
+ s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
+ if (m/\bprogram\s+[A-Za-z]/i) {$is_pascal=1;}
+ if (m/\bunit\s+[A-Za-z]/i) {$is_pascal=1;}
+ if (m/\bmodule\s+[A-Za-z]/i) {$is_pascal=1;}
+ if (m/\bprocedure\b/i) {$is_pascal = 1; }
+ if (m/\bfunction\b/i) {$is_pascal = 1; }
+ if (m/^\s*interface\s+/i) {$is_pascal = 1; }
+ if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
+ if (m/\bconstant\s+/i) {$is_pascal=1;}
+ if (m/\bbegin\b/i) { $found_begin = 1; }
+ if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
+ if ($is_pascal) {
+ last;
+ }
+ }
+
+ close(PASCAL_FILE);
+ $pascal_files{$filename} = $is_pascal; # Store result in cache.
+ return $is_pascal;
+}
+
+# Cache which files are php or not.
+# Key is the full file pathname; value is 1 if it is (else 0).
+%php_files = ();
+
+sub really_is_php {
+# Given filename, returns TRUE if its contents really is php.
+
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_php = 0; # Value to determine.
+ # Need to find a matching pair of surrounds, with ending after beginning:
+ my $normal_surround = 0; # <?; bit 0 = <?, bit 1 = ?>
+ my $script_surround = 0; # <script..>; bit 0 = <script language="php">
+ my $asp_surround = 0; # <%; bit 0 = <%, bit 1 = %>
+
+ # Return cached result, if available:
+ if ($php_files{$filename}) { return $php_files{$filename};}
+
+ open(PHP_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's php.\n";
+ while(<PHP_FILE>) {
+ if (m/\<\?/) { $normal_surround |= 1; }
+ if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
+ if (m/\<script.*language="?php"?/i) { $script_surround |= 1; }
+ if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
+ if (m/\<\%/) { $asp_surround |= 1; }
+ if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
+ }
+ close(PHP_FILE);
+
+ if ( ($normal_surround == 3) || ($script_surround == 3) ||
+ ($asp_surround == 3)) {
+ $is_php = 1;
+ }
+
+ $php_files{$filename} = $is_php; # Store result in cache.
+
+ return $is_php;
+}
+
+
+
+sub examine_dir {
+ # Given a file, determine if there are only C++, OBJC, C, or a mixture
+ # in the same directory. Returns "ansic", "cpp", "objc" or "mix"
+ my $filename = shift;
+ chomp($filename);
+ my $dirname = $filename;
+ $dirname =~ s/\/[^\/]*$//;
+ my $saw_ansic_in_dir = 0;
+ my $saw_pc_in_dir = 0; # ".pc" may mean Oracle C.
+ my $saw_pcc_in_dir = 0; # ".pc" may mean Oracle C++.
+ my $saw_cpp_in_dir = 0;
+ my $saw_objc_in_dir = 0;
+ opendir(DIR, $dirname) || die "can't opendir $dirname";
+ while (defined($_ = readdir(DIR))) {
+ chomp;
+ next if (!$_);
+ if (m/\.(cpp|C|cxx|cc)$/ && -f "$dirname/$_") {$saw_cpp_in_dir = 1;}
+ if (m/\.c$/ && -f "$dirname/$_") {$saw_ansic_in_dir = 1;}
+ if (m/\.pc$/ && -f "$dirname/$_") {$saw_pc_in_dir = 1;}
+ if (m/\.pcc$/ && -f "$dirname/$_") {$saw_pcc_in_dir = 1;}
+ if (m/\.m$/ && -f "$dirname/$_" && &really_is_objc($dirname . "/" . $_))
+ {$saw_objc_in_dir = 1;}
+ if (($saw_ansic_in_dir + $saw_cpp_in_dir + $saw_objc_in_dir) > 1) {
+ closedir(DIR);
+ return "mix";
+ }
+ }
+ # Done searching; we saw at most one type.
+ if ($saw_ansic_in_dir) {return "c";}
+ elsif ($saw_cpp_in_dir) {return "cpp";}
+ elsif ($saw_objc_in_dir) {return "objc";}
+ elsif ($saw_pc_in_dir && (!$saw_pcc_in_dir)) {return "c";} # Guess "C".
+ elsif ($saw_pcc_in_dir && (!$saw_pc_in_dir)) {return "cpp";} # Guess "C".
+ else {return "mix";} # We didn't see anything... so let's say "mix".
+}
+
+sub was_generated_automatically() {
+ # Determine if the file was generated automatically.
+ # Use a simple heuristic: check if first few lines have phrases like
+ # "generated automatically", "automatically generated", "Generated by",
+ # or "do not edit" as the first
+ # words in the line (after possible comment markers and spaces).
+ my $filename = shift;
+
+ if ($autogen_okay) {return 0;};
+
+ chomp($filename);
+ reopen($filename);
+ $i = 15; # Look at first 15 lines.
+ while (defined($_ = <CODE_FILE>)) {
+ if (m/^[\s#\/\*;\-\%]*generated automatically/i ||
+ m/^[\s#\/\*;\-\%]*automatically generated/i ||
+ m/^[\s#\/\*;\-\%]*generated by /i || # libtool uses this.
+ m/^[\s#\/\*;\-\%]*a lexical scanner generated by flex/i ||
+ m/^[\s#\/\*;\-\%]*this is a generated file/i || # TeTex uses this.
+ m/^[\s#\/\*;\-\%]*generated with the.*utility/i || # TeTex uses this.
+ m/^[\s#\/\*;\-\%]*do not edit/i) {
+ return 1;
+ }
+ $i--;
+ last if $i <= 0;
+ }
+ return 0;
+}
+
+
+# Previous files added, indexed by digest:
+
+%previous_files = ();
+
+$cached_digest = "";
+$cached_digest_filename = "";
+
+$digest_method = undef;
+
+sub compute_digest_given_method {
+ my $filename = shift;
+ my $method = shift;
+ my $result;
+
+ if ($method eq "md5sum") {
+ open(FH, "-|", "md5sum", $filename) or return undef;
+ $result = <FH>;
+ close FH;
+ return undef if ! defined($result);
+ chomp($result);
+ $result =~ s/^\s*//; # Not needed for GNU Textutils.
+ $result =~ s/[^a-fA-F0-9].*//; # Strip away end.
+ } elsif ($method eq "md5") {
+ open(FH, "-|", "md5", $filename) or return undef;
+ $result = <FH>;
+ close FH;
+ return undef if ! defined($result);
+ chomp($result);
+ $result =~ s/^.* //; # Strip away beginning.
+ } elsif ($method eq "openssl") {
+ open(FH, "-|", "openssl", "dgst", "-md5", $filename) or return undef;
+ $result = <FH>;
+ close FH;
+ return undef if ! defined($result);
+ chomp($result);
+ $result =~ s/^.* //; # Strip away beginning.
+ } else {
+ # "Can't happen"
+ die "Unknown method";
+ }
+ return $result;
+}
+
+sub compute_digest {
+ my $filename = shift;
+ my $result;
+ if (defined($digest_method)) {
+ $result = compute_digest_given_method($filename, $digest_method);
+ } else {
+ # Try each method in turn until one works.
+ # There doesn't seem to be a way in perl to disable an error message
+ # display if the command is missing, which is annoying. However, the
+ # program is more robust if we check for the command each time we run.
+ print "Finding a working MD5 command....\n";
+ foreach $m ("md5sum", "md5", "openssl") {
+ $result = compute_digest_given_method($filename, $m);
+ if (defined($result)) {
+ $digest_method = $m;
+ last;
+ }
+ }
+ if (!defined($digest_method)) {
+ die "Failure - could not find a working md5 program using $filename.";
+ }
+ print "Found a working MD5 command.\n";
+ }
+ return $result;
+}
+
+sub get_digest {
+ my $filename = shift;
+ my $result;
+ # First, check the cache -- did we just compute this?
+ if ($filename eq $cached_digest_filename) {
+ return $cached_digest; # We did, so here's what it was.
+ }
+
+ $result = compute_digest($filename);
+ # Store in most-recently-used cache.
+ $cached_digest = $result;
+ $cached_digest_filename = $filename;
+ return $result;
+}
+
+sub already_added {
+ # returns the first file's name with the same contents,
+ # else returns the empty string.
+
+ my $filename = shift;
+ my $digest = &get_digest($filename);
+
+ if ($previous_files{$digest}) {
+ return $previous_files{$digest};
+ } else {
+ return "";
+ }
+}
+
+sub close_lang_lists {
+ my $lang;
+ my $file;
+ while (($lang, $file) = each(%lang_list_files)) {
+ $file->close(); # Ignore any errors on close, there's little we can do.
+ }
+ %lang_list_files = ();
+}
+
+sub force_record_file_type {
+ my ($filename, $type) = @_;
+
+ if (!$type) {die "ERROR! File $filename, type $file_type\n";}
+ if ($type eq "c") {$type = "ansic";};
+ if (!defined($lang_list_files{$type})) {
+ $lang_list_files{$type} = new FileHandle("${dir}/${type}_list.dat", "w") ||
+ die "Could not open ${dir}/${type}_list.dat";
+ }
+ $lang_list_files{$type}->printf("%s\n", $filename);
+}
+
+
+sub record_file_type {
+ my ($filename, $type) = @_;
+ # First check if the file should be auto, dup, or zero - and add there
+ # if so. Otherwise, add to record of 'type'.
+
+ my $first_filename;
+
+ if (-z $filename) {
+ force_record_file_type($filename, "zero");
+ return;
+ }
+
+ if (&was_generated_automatically($filename)) {
+ force_record_file_type($filename, "auto");
+ return;
+ }
+
+ unless (($duplicates_okay) || ($type eq "not") || ($type eq "unknown")) {
+ $first_filename = &already_added($filename);
+ if ($first_filename) {
+ print "Note: $filename dups $first_filename\n" if $noisy;
+ force_record_file_type("$filename dups $first_filename", "dup");
+ $dup_count++;
+ return;
+ } else { # This isn't a duplicate - record that info, as needed.
+ my $digest = &get_digest($filename);
+ $previous_files{$digest} = $filename;
+ if ($duplistfile) {
+ print DUPLIST "$digest $filename\n";
+ }
+ }
+ }
+
+ force_record_file_type($filename, $type);
+}
+
+
+
+sub file_type_from_contents() {
+ # Determine if file type is a scripting language, and if so, return it.
+ # Returns its type as a string, or the empty string if it's undetermined.
+ my $filename = shift;
+ my $command;
+ chomp($filename);
+ reopen($filename);
+ # Don't do $firstline = <CODE_FILE> here because the file may be binary;
+ # instead, read in a fixed number of bytes:
+ read CODE_FILE, $firstline, 200;
+ return "" if (!$_);
+ chomp($firstline);
+ if (!$_) {return "";}
+ if (!$firstline) {return "";}
+
+ # Handle weirdness: If there's a ".cpp" file beginning with .\"
+ # then it clearly isn't C/C++... it's a man page. People who create
+ # and distribute man pages with such filename extensions should have
+ # a fingernail removed, slowly :-).
+ if (($firstline =~ m@^[,.]\\"@) &&
+ $filename =~ m@\.(c|cpp|C|cxx|cc)$@) {return "not";}
+
+
+ if (!($firstline =~ m@^#!@)) {return "";} # No script indicator here.
+
+ # studying $firstline doesn't speed things up, unfortunately.
+
+ # I once used a pattern that only acknowledged very specific directories,
+ # but I found that many test cases use unusual script locations
+ # (to ensure that they're invoking the correct program they're testing).
+ # Thus, we depend on the program being named with postfixed whitespace,
+ # and either begin named by itself or with a series of lowercase
+ # directories ending in "/".
+
+ # I developed these patterns by starting with patterns that appeared
+ # correct, and then examined the output (esp. warning messages) to see
+ # what I'd missed.
+
+ $command = "";
+
+ # Strip out any calls to sudo
+ if ($firstline =~ m@^#!\s*/(usr/)?bin/sudo\s+(/.*)@) {
+ $firstline = "#!" . $2;
+ }
+
+ if ($firstline =~ m@^#!\s*/(usr/)?bin/env\s+([a-zA-Z0-9\._]+)(\s|\Z)@i) {
+ $command = $2;
+ } elsif ($firstline =~ m@^#!\s*([a-zA-Z0-9\/\.]+\/)?([a-zA-Z0-9\._]+)(\s|\Z)@) {
+ $command = $2;
+ }
+
+ if ( ($command =~ m/^(bash|ksh|zsh|pdksh|sh)[0-9\.]*(\.exe)?$/i) ||
+ ($firstline =~
+ m~^#!\s*\@_?(SCRIPT_)?(PATH_)?(BA|K)?SH(ELL)?(\d+)?\@?(\s|\Z)~)) {
+ # Note: wish(1) uses a funny trick; see wish(1) for more info.
+ # The following code detects this unusual wish convention.
+ if ($firstline =~ m@exec wish(\s|\Z)@i) {
+ return "tcl"; # return the type for wish.
+ }
+ # Otherwise, it's shell.
+ return "sh";
+ }
+ if ( ($command =~ m/^(t?csh\d*)[0-9\.]*(\.exe)?$/i) ||
+ ($firstline =~ m@^#!\s*xCSH_PATHx(\s|\Z)@)) {
+ return "csh";
+ }
+ if ( ($command =~ m/^(mini)?perl[0-9\.]*(\.exe)?$/i) ||
+ ($command =~ m/^speedycgi[0-9\.]*(\.exe)?$/i) ||
+ ($firstline =~ m~^#!\s*\@_?(PATH_)?PERL\d*(PROG)?\@(\s|\Z)~) ||
+ ($firstline =~ m~^#!\s*xPERL_PATHx(\s|\Z)~)) {
+ return "perl";
+ }
+ if ($command =~ m/^python[0-9\.]*(\.exe)?$/i) {
+ return "python";
+ }
+ if ($command =~ m/^(tcl|tclsh|bltwish|wish|wishx|WISH)[0-9\.]*(\.exe)?$/i) {
+ return "tcl";
+ }
+ if ($command =~ m/^expectk?[0-9\.]*(\.exe)?$/i) { return "exp"; }
+ if ($command =~ m/^[ng]?awk[0-9\.]*(\.exe)?$/i) { return "awk"; }
+ if ($command =~ m/^sed$/i) { return "sed"; }
+ if ($command =~ m/^guile[0-9\.]*$/i) { return "lisp"; }
+ if ($firstline =~ m@^#!.*make\b@i) { # We'll claim that #! make is a makefile.
+ return "makefile";
+ }
+ if ($firstline =~ m@^#!\s*\.(\s|\Z)@) { # Lonely period.
+ return ""; # Ignore the first line, it's not helping.
+ }
+ if ($firstline =~ m@^#!\s*\Z@) { # Empty line.
+ return ""; # Ignore the first line, it's not helping.
+ }
+ if ($firstline =~ m@^#!\s*/dev/null@) { # /dev/null is the script?!?
+ return ""; # Ignore nonsense ("/dev/null").
+ }
+ if ($firstline =~ m@^#!\s*/unix(\s|Z)@) {
+ return ""; # Ignore nonsense ("/unix").
+ }
+ if (($filename =~ m@\.pl$@) || ($filename =~ m@\.pm$@)) {
+ return ""; # Don't warn about files that will be ID'd as perl files.
+ }
+ if (($filename =~ m@\.sh$@)) {
+ return ""; # Don't warn about files that will be ID'd as sh files.
+ }
+ if ($firstline =~ m@^#!\s*\S@) {
+ $firstline =~ s/\n.*//s; # Delete everything after first line.
+ $warning_from_first_line = "WARNING! File $filename has unknown start: $firstline";
+ return "";
+ }
+ return "";
+}
+
+
+sub get_file_type {
+ my $file_to_examine = shift;
+ # Return the given file's type.
+ # Consider the file's contents, filename, and file extension.
+
+ $warning_from_first_line = "";
+
+ # Skip file names known to not be program files.
+ $basename = $file_to_examine;
+ $basename =~ s!^.*/!!;
+ if ($not_code_filenames{$basename}) {
+ print "Note: Skipping non-program filename: $file_to_examine\n"
+ if $noisy;
+ return "not";
+ }
+
+ # Skip "configure" files if there's a corresponding "configure.in"
+ # file; such a situation suggests that "configure" is automatically
+ # generated by "autoconf" from "configure.in".
+ if (($file_to_examine =~ m!/configure$!) &&
+ (-s "${file_to_examine}.in")) {
+ print "Note: Auto-generated configure file $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+
+ if (($basename eq "lex.yy.c") || # Flex/Lex output!
+ ($basename eq "lex.yy.cc") || # Flex/Lex output - C++ scanner.
+ ($basename eq "y.code.c") || # yacc/bison output.
+ ($basename eq "y.tab.c") || # yacc output.
+ ($basename eq "y.tab.h")) { # yacc output.
+ print "Note: Auto-generated lex/yacc file $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+
+ # Bison is more flexible than yacc -- it can create arbitrary
+ # .c/.h files. If we have a .tab.[ch] file, with a corresponding
+ # .y file, then it's been automatically generated.
+ # Bison can actually save to any filename, and of course a Makefile
+ # can rename any file, but we can't help that.
+ if ($basename =~ m/\.tab\.[ch]$/) {
+ $possible_bison = $file_to_examine;
+ $possible_bison =~ s/\.tab\.[ch]$/\.y/;
+ if (-s "$possible_bison") {
+ print "Note: found bison-generated file $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+ }
+
+ # If there's a corresponding ".MASTER" file, treat this file
+ # as automatically-generated derivative. This handles "exmh".
+ if (-s "${file_to_examine}.MASTER") {
+ print "Note: Auto-generated non-.MASTER file $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+
+ # Peek at first line to determine type. Note that the file contents
+ # take precedence over the filename extension, because there are files
+ # (such as /usr/src/redhat/BUILD/teTeX-1.0/texmf/doc/mkhtml.nawk)
+ # which have one extension (say, ".nawk") but actually contain
+ # something else (at least in part):
+ $type = &file_type_from_contents($file_to_examine);
+ if ($type) {
+ return $type;
+ }
+
+ # Use filename to determine if it's a makefile:
+ if (($file_to_examine =~ m/\bmakefile$/i) ||
+ ($file_to_examine =~ m/\bmakefile\.txt$/i) ||
+ ($file_to_examine =~ m/\bmakefile\.pc$/i) ||
+ ($file_to_examine =~ m/\bdebian\/rules$/i)) { # "debian/rules" too.
+ return "makefile";
+ }
+
+ # Try to use filename extension to determine type:
+ if ($file_to_examine =~ m/\.([^.\/]+)$/) {
+ $type = $1;
+
+ # More ugly problems: some source filenames only use
+ # UPPERCASE, and they can be mixed with regular files.
+ # Since normally filenames are lowercase or mixed case,
+ # presume that an all-uppercase filename means we have to assume
+ # that the extension must be lowercased. This particularly affects
+ # .C, which usually means C++ but in this case would mean plain C.
+ my $uppercase_filename = 0;
+ if (($file_to_examine =~ m/[A-Z]/) &&
+ (! ($file_to_examine =~ m/[a-z]/))) {
+ $uppercase_filename = 1;
+ $type = lc($type); # Use lowercase version of type.
+ }
+
+ # Is this type known to NOT be a program?
+ if ($not_code_extensions{$type}) {
+ return "not";
+ }
+
+ # Handle weirdness: ".hpp" is a C/C++ header file, UNLESS it's
+ # makefile.hpp (a makefile); see /usr/src/redhat/BUILD,
+ # pine4.21/pine/makefile.hpp and pine4.21/pico/makefile.hpp
+ # Note that pine also includes pine4.21/pine/osdep/diskquot.hpp.
+ # Kaffe uses .hpp for C++ header files.
+ if (($type eq "hpp") && ($file_to_examine =~ m/makefile\.hpp$/i))
+ {return "makefile";}
+
+ # If it's a C file but there's a ".pc" or ".pgc" file, then presume that
+ # it was automatically generated:
+ if ($type eq "c") {
+ $pc_name = $file_to_examine;
+ if ($uppercase_filename) { $pc_name =~ s/\.C$/\.PC/; }
+ else { $pc_name =~ s/\.c$/\.pc/; }
+ if (-s "$pc_name" ) {
+ print "Note: Auto-generated C file (from .pc file) $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+ $pc_name = $file_to_examine;
+ if ($uppercase_filename) { $pc_name =~ s/\.C$/\.PGC/; }
+ else { $pc_name =~ s/\.c$/\.pgc/; }
+ if (-s "$pc_name" ) {
+ print "Note: Auto-generated C file (from .pgc file) $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+ }
+
+ # ".pc" is the official extension for Oracle C programs with
+ # Embedded C commands, but many programs use ".pc" to indicate
+ # the "PC" (MS-DOS/Windows) version of a file.
+ # We'll use heuristics to detect when it's not really C,
+ # otherwise claim it's C and move on.
+ if ($type eq "pc") { # If it has one of these filenames, it's not C.
+ if ($file_to_examine =~ m/\bmakefile\.pc$/i) { return "makefile"; }
+ if (($file_to_examine =~ m/\bREADME\.pc$/i) ||
+ ($file_to_examine =~ m/\binstall\.pc$/i) ||
+ ($file_to_examine =~ m/\bchanges\.pc$/i)) {return "not";}
+ else { return "c";}
+ }
+
+ if (defined($file_extensions{$type})) {
+ $type = $file_extensions{$type};
+ if ( (($type eq "exp") && (!&really_is_expect($file_to_examine))) ||
+ (($type eq "tk") && (!&really_is_expect($file_to_examine))) ||
+ (($type eq "objc") && (!&really_is_objc($file_to_examine))) ||
+ (($type eq "lex") && (!&really_is_lex($file_to_examine))) ||
+ (($type eq "pascal") && (!&really_is_pascal($file_to_examine)))) {
+ $type = "unknown";
+ } elsif ($type eq "inc") {
+ if (&really_is_php($file_to_examine)) {
+ $type = "php"; # Hey, the .inc is PHP!
+ } elsif (&really_is_incpascal($file_to_examine)) {
+ $type = "pascal";
+ } else {
+ $type = "unknown";
+ }
+ };
+ return $type;
+ }
+
+ }
+ # If we were expecting a script, warn about that.
+ if ($warning_from_first_line) {print "$warning_from_first_line\n";}
+ # Don't know what it is, so report "unknown".
+ return "unknown";
+}
+
+
+
+
+sub convert_h_files {
+ # Determine if the ".h" files we saw are C, OBJC, C++, or a mixture (!)
+ # Usually ".hpp" files are C++, but if we didn't see any C++ files then
+ # it probably isn't. This handles situations like pine; its has a file
+ # /usr/src/redhat/BUILD/pine4.21/pine/osdep/diskquot.hpp
+ # where the ".hpp" is for HP, not C++. (Of course, we completely miss
+ # the other files in that pine directory because they have truly bizarre
+ # extensions, but there's no easy way to handle such nonstandard things).
+
+ if (!defined($lang_list_files{"h"})) { return; }
+
+ my $saw_ansic = defined($lang_list_files{"ansic"});
+ my $saw_cpp = defined($lang_list_files{"cpp"});
+ my $saw_objc = defined($lang_list_files{"objc"});
+ my $confidence;
+
+ $lang_list_files{"h"}->close();
+
+ open(H_LIST, "<${dir}/h_list.dat") || die "Can't reopen h_list\n";
+
+ if ($saw_ansic && (!$saw_cpp) && (!$saw_objc)) {
+ # Only C, let's assume .h files are too
+ while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "c"); };
+ } elsif ($saw_cpp && (!$saw_ansic) && (!$saw_objc)) { # Only C++
+ while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "cpp"); };
+ } elsif ($saw_objc && (!$saw_ansic) && (!$saw_cpp)) { # Only Obj-C
+ while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "objc"); };
+ } else {
+ # Ugh, we have a mixture. Let's try to determine what we have, using
+ # various heuristics (looking for a matching name in the directory,
+ # reading the file contents, the contents in the directory, etc.)
+ # When all else fails, assume C.
+ while (defined($_=<H_LIST>)) {
+ chomp;
+ next if (!$_);
+ # print "DEBUG: H file $_\n";
+
+ $h_file = $_;
+ $cpp2_equivalent =
+ $cpp3_equivalent = $cpp4_equivalent = $objc_equivalent = $_;
+ $ansic_equivalent = $cpp_equivalent = $_;
+ $ansic_equivalent =~ s/h$/c/;
+ $cpp_equivalent =~ s/h$/C/;
+ $cpp2_equivalent =~ s/h$/cpp/;
+ $cpp3_equivalent =~ s/h$/cxx/;
+ $cpp4_equivalent =~ s/h$/cc/;
+ $objc_equivalent =~ s/h$/m/;
+ if (m!\.hpp$!) { force_record_file_type($h_file, "cpp"); }
+ elsif ( (-s $cpp2_equivalent) ||
+ (-s $cpp3_equivalent) || (-s $cpp4_equivalent))
+ { force_record_file_type($h_file, "cpp"); }
+ # Note: linuxconf has many ".m" files that match .h files,
+ # but the ".m" files are straight C and _NOT_ objective-C.
+ # The following test handles cases like this:
+ elsif ($saw_objc && (-s $objc_equivalent) &&
+ &really_is_objc($objc_equivalent))
+ { &force_record_file_type($h_file, "objc"); }
+ elsif (( -s $ansic_equivalent) && (! -s $cpp_equivalent))
+ { force_record_file_type($h_file, "c"); }
+ elsif ((-s $cpp_equivalent) && (! -s $ansic_equivalent))
+ { force_record_file_type($h_file, "cpp"); }
+ else {
+ $confidence = &looks_like_cpp($h_file);
+ if ($confidence == 2)
+ { &force_record_file_type($h_file, "cpp"); }
+ else {
+ $files_in_dir = &examine_dir($h_file);
+ if ($files_in_dir eq "cpp")
+ { &force_record_file_type($h_file, "cpp"); }
+ elsif ($files_in_dir eq "objc")
+ { &force_record_file_type($h_file, "objc"); }
+ elsif ($confidence == 1)
+ { &force_record_file_type($h_file, "cpp"); }
+ elsif ($h_file =~ m![a-z][0-9]*\.H$!)
+ # Mixed-case filename, .H extension.
+ { &force_record_file_type($h_file, "cpp"); }
+ else # We're clueless. Let's guess C.
+ { &force_record_file_type($h_file, "c"); };
+ }
+ }
+ }
+ } # Done handling ".h" files.
+ close(H_LIST);
+}
+
+
+# MAIN PROGRAM STARTS HERE.
+
+# Handle options.
+while (($#ARGV >= 0) && ($ARGV[0] =~ m/^--/)) {
+ $duplicates_okay = 1 if ($ARGV[0] =~ m/^--duplicates$/); # Count duplicates.
+ $crossdups_okay = 1 if ($ARGV[0] =~ m/^--crossdups$/); # Count crossdups.
+ $autogen_okay = 1 if ($ARGV[0] =~ m/^--autogen$/); # Count autogen.
+ $noisy = 1 if ($ARGV[0] =~ m/^--verbose$/); # Verbose output.
+ if ($ARGV[0] =~ m/^--duplistfile$/) { # File to get/record dups.
+ shift;
+ $duplistfile = $ARGV[0];
+ }
+ last if ($ARGV[0] =~ m/^--$/);
+ shift;
+}
+
+if ($#ARGV < 0) {
+ print "Error: No directory names given.\n";
+ exit(1);
+}
+
+if ($duplistfile) {
+ if (-e $duplistfile) {
+ open(DUPLIST, "<$duplistfile") || die "Can't open $duplistfile";
+ while (defined($_ = <DUPLIST>)) {
+ chomp;
+ ($digest, $filename) = split(/ /, $_, 2);
+ if (defined($digest) && defined($filename)) {
+ $previous_files{$digest} = $filename;
+ }
+ }
+ close(DUPLIST);
+ }
+ open(DUPLIST, ">>$duplistfile") || die "Can't open for writing $duplistfile";
+}
+
+
+while ( $dir = shift ) {
+
+ if (! -d "$dir") {
+ print "Skipping non-directory $dir\n";
+ next;
+ }
+
+ if ($examined_directories{$dir}) {
+ print "Skipping already-examined directory $dir\n";
+ next;
+ }
+ $examined_directories{$dir} = 1;
+
+ if (! open(FILELIST, "<${dir}/filelist")) {
+ print "Skipping directory $dir; it doesn't contain a file 'filelist'\n";
+ next;
+ }
+
+ if (-r "${dir}/all-physical.sloc") {
+ # Skip already-analyzed directories; if it's been analyzed, we've already
+ # broken them down.
+ next;
+ }
+
+ if ($crossdups_okay) { # Cross-dups okay; forget the hash of previous files.
+ %previous_files = ();
+ }
+
+ # insert blank lines, in case we need to recover from a midway crash
+ if ($duplistfile) {
+ print DUPLIST "\n";
+ }
+
+
+ $dup_count = 0;
+
+ while (defined($_ = <FILELIST>)) {
+ chomp;
+ $file = $_;
+ next if (!defined($file) || ($file eq ""));
+ if ($file =~ m/\n/) {
+ print STDERR "WARNING! File name contains embedded newline; it'll be IGNORED.\n";
+ print STDERR "Filename is: $file\n";
+ next;
+ }
+ $file_type = &get_file_type($file);
+ if ($file_type) {
+ &record_file_type($file, $file_type);
+ } else {
+ print STDERR "WARNING! No file type selected for $file\n";
+ }
+ }
+
+ # Done with straightline processing. Now we need to determine if
+ # the ".h" files we saw are C, OBJC, C++, or a mixture (!)
+ &convert_h_files();
+
+
+ # Done processing the directory. Close up shop so we're
+ # ready for the next directory.
+
+ close(FILELIST);
+ close_lang_lists();
+ reopen(""); # Close code file.
+
+ if ($dup_count > 50) {
+ print "Warning: in $dir, number of duplicates=$dup_count\n";
+ }
+
+}
+
+
diff --git a/break_filelist.orig b/break_filelist.orig
new file mode 100755
index 0000000..b34c702
--- /dev/null
+++ b/break_filelist.orig
@@ -0,0 +1,1084 @@
+#!/usr/bin/perl -w
+
+# break_filelist
+# Take a list of dirs which contain a "filelist";
+# creates files in each directory identifying which are C, C++, Perl, etc.
+# For example, "ansic.dat" lists all ANSI C files contained in filelist.
+# Note: ".h" files are ambiguous (they could be C or C++); the program
+# uses heuristics to determine this.
+# The list of .h files is also contained in h_list.dat.
+
+# (C) Copyright 2000-2001 David A. Wheeler
+# Part of "SLOCCount", and released under the GPL version 2;
+# see the documentation for details.
+
+# If adding a new language: add the logic to open the file,
+# close the file, and detect & write to the file listing that language.
+
+# Debatable decisions:
+# Doesn't count .dsl files (stylesheets, which are partially LISP).
+# Doesn't count .sql files (SQL queries & commands)
+
+# Note - I don't try to distinguish between TCL and [incr TCL] (itcl),
+# an OO extended version of TCL. For our purposes, it's all TCL.
+
+
+use FileHandle;
+
+
+# Set default configuration:
+
+$duplicates_okay = 0; # Set to 1 if you want to count file duplicates.
+$crossdups_okay = 0; # Set to 1 if duplicates okay in different filelists.
+$autogen_okay = 0; # Set to 1 if you want to count autogen'ed files.
+$noisy = 0; # Set to 1 if you want noisy reports.
+%lang_list_files = ();
+
+# The following extensions are NOT code:
+%not_code_extensions = (
+ "html" => 1,
+ "in" => 1, # Debatable.
+ "xpm" => 1,
+ "po" => 1,
+ "am" => 1, # Debatable.
+ "1" => 1, # Man pages (documentation):
+ "2" => 1,
+ "3" => 1,
+ "4" => 1,
+ "5" => 1,
+ "6" => 1,
+ "7" => 1,
+ "8" => 1,
+ "9" => 1,
+ "n" => 1,
+ "gif" => 1,
+ "tfm" => 1,
+ "png" => 1,
+ "m4" => 1, # Debatable.
+ "bdf" => 1,
+ "sgml" => 1,
+ "mf" => 1,
+ "txt" => 1,
+ "man" => 1,
+ "xbm" => 1,
+ "Tag" => 1,
+ "sgm" => 1,
+ "vf" => 1,
+ "tex" => 1,
+ "elc" => 1,
+ "gz" => 1,
+ "dic" => 1,
+ "pfb" => 1,
+ "fig" => 1,
+ "afm" => 1,
+ "jpg" => 1,
+ "bmp" => 1,
+ "htm" => 1,
+ "kdelnk" => 1,
+ "desktop" => 1,
+ "pbm" => 1,
+ "pdf" => 1,
+ "ps" => 1, # Postscript is _USUALLY_ generated automatically.
+ "eps" => 1,
+ "doc" => 1,
+ "man" => 1,
+ "o" => 1, # Object code is generated from source code.
+ "a" => 1, # Static object code.
+ "so" => 1, # Dynamically-loaded object code.
+);
+
+# The following filenames are NOT code:
+%not_code_filenames = (
+ "README" => 1,
+ "Readme" => 1,
+ "readme" => 1,
+ "README.tk" => 1, # used in kdemultimedia, it's confusing.
+ "Changelog" => 1,
+ "ChangeLog" => 1,
+ "Repository" => 1,
+ "CHANGES" => 1,
+ "Changes" => 1,
+ ".cvsignore" => 1,
+ "Root" => 1, # CVS.
+ "BUGS" => 1,
+ "TODO" => 1,
+ "COPYING" => 1,
+ "MAINTAINERS" => 1,
+ "Entries" => 1,
+ # Skip "iconfig.h" files; they're used in Imakefiles
+ # (used in xlockmore):
+ "iconfig.h" => 1,
+);
+
+
+# A filename ending in the following extensions usually maps to the
+# given language:
+
+%file_extensions = (
+ "c" => "ansic",
+ "ec" => "ansic", # Informix C.
+ "ecp" => "ansic", # Informix C.
+ "pgc" => "ansic", # Postgres embedded C/C++ (guess C)
+ "C" => "cpp", "cpp" => "cpp", "cxx" => "cpp", "cc" => "cpp",
+ "pcc" => "cpp", # Input to Oracle C++ preproc.
+ "m" => "objc",
+ "h" => "h", "H" => "h", "hpp" => "h",
+ "ada" => "ada", "adb" => "ada", "ads" => "ada",
+ "pad" => "ada", # Oracle Ada preprocessor.
+ "f" => "fortran",
+ "p" => "pascal",
+ "py" => "python",
+ "s" => "asm", "S" => "asm", "asm" => "asm",
+ "sh" => "sh", "bash" => "sh",
+ "csh" => "csh", "tcsh" => "csh",
+ "java" => "java",
+ "lisp" => "lisp", "el" => "lisp", "scm" => "lisp", "lsp" => "lisp",
+ "jl" => "lisp",
+ "tcl" => "tcl", "tk" => "tcl", "itk" => "tcl",
+ "exp" => "exp",
+ "pl" => "perl", "pm" => "perl", "perl" => "perl",
+ "awk" => "awk",
+ "sed" => "sed",
+ "y" => "yacc",
+ "l" => "lex",
+ "makefile" => "makefile",
+ "sql" => "sql",
+ "php" => "php", "php3" => "php", "php4" => "php", "php5" => "php",
+ "php6" => "php",
+ "inc" => "inc", # inc MAY be PHP - we'll handle it specially.
+ # ???: .pco is Oracle Cobol, need to add with a Cobol counter.
+);
+
+
+# GLOBAL VARIABLES
+
+$dup_count = 0;
+
+$warning_from_first_line = "";
+
+%examined_directories = (); # Keys = Names of directories examined this run.
+
+$duplistfile = "";
+
+###########
+
+
+# Handle re-opening individual CODE_FILEs.
+# CODE_FILE is public
+
+# Private value:
+$opened_file_name = "";
+
+sub reopen {
+ # Open file if it isn't already, else rewind.
+ # If filename is "", close any open file.
+ my $filename = shift;
+ chomp($filename);
+ # print("DEBUG: reopen($filename)\n");
+ if ($filename eq "") {
+ if ($opened_file_name) {close(CODE_FILE);}
+ $opened_file_name = "";
+ return;
+ }
+ if ($filename eq $opened_file_name) {
+ seek CODE_FILE, 0, 0; # Rewind.
+ } else { # We're opening a new file.
+ if ($opened_file_name) {close(CODE_FILE)}
+ open(CODE_FILE, "<$filename") || die "Can't open $filename";
+ $opened_file_name = $filename;
+ }
+}
+
+###########
+
+sub looks_like_cpp {
+ # returns a confidence level - does the file looks like it's C++?
+ my $filename = shift;
+ my $confidence = 0;
+ chomp($filename);
+ open( SUSPECT, "<$filename");
+ while (<SUSPECT>) {
+ if (m/^\s*class\b.*\{/) { # "}"
+ close(SUSPECT);
+ return 2;
+ }
+ if (m/^\s*class\b/) {
+ $confidence = 1;
+ }
+ }
+ close(SUSPECT);
+ return $confidence;
+}
+
+
+# Cache which files are objective-C or not.
+# Key is the full file pathname; value is 1 if objective-C (else 0).
+%objective_c_files = ();
+
+sub really_is_objc {
+# Given filename, returns TRUE if its contents really are objective-C.
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_objc = 0; # Value to determine.
+ my $brace_lines = 0; # Lines that begin/end with curly braces.
+ my $plus_minus = 0; # Lines that begin with + or -.
+ my $word_main = 0; # Did we find "main("?
+ my $special = 0; # Did we find a special Objective-C pattern?
+
+ # Return cached result, if available:
+ if ($objective_c_files{$filename}) { return $objective_c_files{$filename};}
+
+ open(OBJC_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's objective C.\n";
+ while(<OBJC_FILE>) {
+
+ if (m/^\s*[{}]/ || m/[{}];?\s*$/) { $brace_lines++;}
+ if (m/^\s*[+-]/) {$plus_minus++;}
+ if (m/\bmain\s*\(/) {$word_main++;} # "main" followed by "("?
+ # Handle /usr/src/redhat/BUILD/egcs-1.1.2/gcc/objc/linking.m:
+ if (m/^\s*\[object name\];\s*$/i) {$special=1;}
+ }
+ close(OBJC_FILE);
+
+ if (($brace_lines > 1) && (($plus_minus > 1) || $word_main || $special))
+ {$is_objc = 1;}
+
+ $objective_c_files{$filename} = $is_objc; # Store result in cache.
+
+ return $is_objc;
+}
+
+
+# Cache which files are lex or not.
+# Key is the full file pathname; value is 1 if lex (else 0).
+%lex_files = ();
+
+sub really_is_lex {
+# Given filename, returns TRUE if its contents really is lex.
+# lex file must have "%%", "%{", and "%}".
+# In theory, a lex file doesn't need "%{" and "%}", but in practice
+# they all have them, and requiring them avoid mislabeling a
+# non-lexfile as a lex file.
+
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_lex = 0; # Value to determine.
+ my $percent_percent = 0;
+ my $percent_opencurly = 0;
+ my $percent_closecurly = 0;
+
+ # Return cached result, if available:
+ if ($lex_files{$filename}) { return $lex_files{$filename};}
+
+ open(LEX_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's lex.\n";
+ while(<LEX_FILE>) {
+ $percent_percent++ if (m/^\s*\%\%/);
+ $percent_opencurly++ if (m/^\s*\%\{/);
+ $percent_closecurly++ if (m/^\s*\%\}/);
+ }
+ close(LEX_FILE);
+
+ if ($percent_percent && $percent_opencurly && $percent_closecurly)
+ {$is_lex = 1;}
+
+ $lex_files{$filename} = $is_lex; # Store result in cache.
+
+ return $is_lex;
+}
+
+
+# Cache which files are expect or not.
+# Key is the full file pathname; value is 1 if it is (else 0).
+%expect_files = ();
+
+sub really_is_expect {
+# Given filename, returns TRUE if its contents really are Expect.
+# Many "exp" files (such as in Apache and Mesa) are just "export" data,
+# summarizing something else # (e.g., its interface).
+# Sometimes (like in RPM) it's just misc. data.
+# Thus, we need to look at the file to determine
+# if it's really an "expect" file.
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it's Expect _IF_ it:
+# 1. has "load_lib" command and either "#" comments or {}.
+# 2. {, }, and one of: proc, if, [...], expect
+
+ my $is_expect = 0; # Value to determine.
+
+ my $begin_brace = 0; # Lines that begin with curly braces.
+ my $end_brace = 0; # Lines that begin with curly braces.
+ my $load_lib = 0; # Lines with the Load_lib command.
+ my $found_proc = 0;
+ my $found_if = 0;
+ my $found_brackets = 0;
+ my $found_expect = 0;
+ my $found_pound = 0;
+
+ # Return cached result, if available:
+ if ($expect_files{$filename}) { return expect_files{$filename};}
+
+ open(EXPECT_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's expect.\n";
+ while(<EXPECT_FILE>) {
+
+ if (m/#/) {$found_pound++; s/#.*//;}
+ if (m/^\s*\{/) { $begin_brace++;}
+ if (m/\{\s*$/) { $begin_brace++;}
+ if (m/^\s*\}/) { $end_brace++;}
+ if (m/\};?\s*$/) { $end_brace++;}
+ if (m/^\s*load_lib\s+\S/) { $load_lib++;}
+ if (m/^\s*proc\s/) { $found_proc++;}
+ if (m/^\s*if\s/) { $found_if++;}
+ if (m/\[.*\]/) { $found_brackets++;}
+ if (m/^\s*expect\s/) { $found_expect++;}
+ }
+ close(EXPECT_FILE);
+
+ if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
+ {$is_expect = 1;}
+ if ( $begin_brace && $end_brace &&
+ ($found_proc || $found_if || $found_brackets || $found_expect))
+ {$is_expect = 1;}
+
+ $expect_files{$filename} = $is_expect; # Store result in cache.
+
+ return $is_expect;
+}
+
+
+# Cached values.
+%pascal_files = ();
+
+sub really_is_pascal {
+# Given filename, returns TRUE if its contents really are Pascal.
+
+# This isn't as obvious as it seems.
+# Many ".p" files are Perl files
+# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
+# others are C extractions
+# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
+# and some files in linuxconf).
+# However, test files in "p2c" really are Pascal, for example.
+
+# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
+# is actually C code. The heuristics determine that they're not Pascal,
+# but because it ends in ".p" it's not counted as C code either.
+# I believe this is actually correct behavior, because frankly it
+# looks like it's automatically generated (it's a bitmap expressed as code).
+# Rather than guess otherwise, we don't include it in a list of
+# source files. Let's face it, someone who creates C files ending in ".p"
+# and expects them to be counted by default as C files in SLOCCount needs
+# their head examined. I suggest examining their head
+# with a sucker rod (see syslogd(8) for more on sucker rods).
+
+# This heuristic counts as Pascal such files such as:
+# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
+# Which is hand-generated. We don't count woven documents now anyway,
+# so this is justifiable.
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it's Pascal _IF_ it has all of the following:
+# 1. "^..program NAME(...);" or "..unit NAME".
+# 2. "procedure", "function", "^..interface", or "^..implementation"
+# 3. a "begin", and
+# 4. it ends with "end." (ignoring {...} comments).
+# The last requirement in particular filters out non-Pascal.
+
+
+ my $is_pascal = 0; # Value to determine.
+
+ my $has_program = 0;
+ my $has_unit = 0;
+ my $has_procedure_or_function = 0;
+ my $found_begin = 0;
+ my $found_terminating_end = 0;
+
+ # Return cached result, if available:
+ if ($pascal_files{$filename}) { return pascal_files{$filename};}
+
+ open(PASCAL_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's pascal.\n";
+ while(<PASCAL_FILE>) {
+ if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;}
+ if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;}
+ if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; }
+ if (m/\bfunction\b/i) { $has_procedure_or_function = 1; }
+ if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; }
+ if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
+ if (m/\bbegin\b/i) { $has_begin = 1; }
+ s/\{.*?\}//g; # Ignore comments on this line; imperfect, but effective.
+ # This heuristic fails if there are multi-line comments after
+ # "end."; I haven't seen that in real Pascal programs:
+ if (m/end\.\s*$/i) {$found_terminating_end = 1;}
+ elsif (m/\S/) {$found_terminating_end = 0;}
+ }
+ close(PASCAL_FILE);
+
+ # Okay, we've examined the entire file looking for clues;
+ # let's use those clues to determine if it's really Pascal:
+
+ if ( ($has_unit || $has_program) && $has_procedure_or_function &&
+ $has_begin && $found_terminating_end)
+ {$is_pascal = 1;}
+
+ $pascal_files{$filename} = $is_pascal; # Store result in cache.
+
+ return $is_pascal;
+}
+
+# Cache which files are php or not.
+# Key is the full file pathname; value is 1 if it is (else 0).
+%php_files = ();
+
+sub really_is_php {
+# Given filename, returns TRUE if its contents really is php.
+
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_php = 0; # Value to determine.
+ # Need to find a matching pair of surrounds, with ending after beginning:
+ my $normal_surround = 0; # <?; bit 0 = <?, bit 1 = ?>
+ my $script_surround = 0; # <script..>; bit 0 = <script language="php">
+ my $asp_surround = 0; # <%; bit 0 = <%, bit 1 = %>
+
+ # Return cached result, if available:
+ if ($php_files{$filename}) { return $php_files{$filename};}
+
+ open(PHP_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's php.\n";
+ while(<PHP_FILE>) {
+ if (m/\<\?/) { $normal_surround |= 1; }
+ if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
+ if (m/\<script.*language="?php"?/i) { $script_surround |= 1; }
+ if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
+ if (m/\<\%/) { $asp_surround |= 1; }
+ if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
+ }
+ close(PHP_FILE);
+
+ if ( ($normal_surround == 3) || ($script_surround == 3) ||
+ ($asp_surround == 3)) {
+ $is_php = 1;
+ }
+
+ $php_files{$filename} = $is_php; # Store result in cache.
+
+ return $is_php;
+}
+
+
+
+sub examine_dir {
+ # Given a file, determine if there are only C++, OBJC, C, or a mixture
+ # in the same directory. Returns "ansic", "cpp", "objc" or "mix"
+ my $filename = shift;
+ chomp($filename);
+ my $dirname = $filename;
+ $dirname =~ s/\/[^\/]*$//;
+ my $saw_ansic_in_dir = 0;
+ my $saw_pc_in_dir = 0; # ".pc" may mean Oracle C.
+ my $saw_pcc_in_dir = 0; # ".pc" may mean Oracle C++.
+ my $saw_cpp_in_dir = 0;
+ my $saw_objc_in_dir = 0;
+ opendir(DIR, $dirname) || die "can't opendir $dirname";
+ while ($_ = readdir(DIR)) {
+ chomp;
+ next if (!$_);
+ if (m/\.(cpp|C|cxx|cc)$/ && -f "$dirname/$_") {$saw_cpp_in_dir = 1;}
+ if (m/\.c$/ && -f "$dirname/$_") {$saw_ansic_in_dir = 1;}
+ if (m/\.pc$/ && -f "$dirname/$_") {$saw_pc_in_dir = 1;}
+ if (m/\.pcc$/ && -f "$dirname/$_") {$saw_pcc_in_dir = 1;}
+ if (m/\.m$/ && -f "$dirname/$_" && &really_is_objc($dirname . "/" . $_))
+ {$saw_objc_in_dir = 1;}
+ if (($saw_ansic_in_dir + $saw_cpp_in_dir + $saw_objc_in_dir) > 1) {
+ closedir(DIR);
+ return "mix";
+ }
+ }
+ # Done searching; we saw at most one type.
+ if ($saw_ansic_in_dir) {return "c";}
+ elsif ($saw_cpp_in_dir) {return "cpp";}
+ elsif ($saw_objc_in_dir) {return "objc";}
+ elsif ($saw_pc_in_dir && (!$saw_pcc_in_dir)) {return "c";} # Guess "C".
+ elsif ($saw_pcc_in_dir && (!$saw_pc_in_dir)) {return "cpp";} # Guess "C".
+ else {return "mix";} # We didn't see anything... so let's say "mix".
+}
+
+sub was_generated_automatically() {
+ # Determine if the file was generated automatically.
+ # Use a simple heuristic: check if first few lines have the
+ # phrase "generated automatically", or "automatically generated",
+ # or "do not edit" as the first
+ # words in the line (after possible comment markers and spaces).
+ my $filename = shift;
+
+ if ($autogen_okay) {return 0;};
+
+ chomp($filename);
+ reopen($filename);
+ $i = 15; # Look at first 15 lines.
+ while (<CODE_FILE>) {
+ if (m/^[\s#\/\*;\-\%]*generated automatically/i ||
+ m/^[\s#\/\*;\-\%]*automatically generated/i ||
+ m/^[\s#\/\*;\-\%]*this is a generated file/i || # TeTex uses this.
+ m/^[\s#\/\*;\-\%]*generated with the.*utility/i || # TeTex uses this.
+ m/^[\s#\/\*;\-\%]*do not edit/i) {
+ return 1;
+ }
+ $i--;
+ last if $i <= 0;
+ }
+ return 0;
+}
+
+
+# Previous files added, indexed by digest:
+
+%previous_files = ();
+
+$cached_digest = "";
+$cached_digest_filename = "";
+
+sub get_digest {
+ my $filename = shift;
+ # First, check the cache -- did we just compute this?
+ if ($filename eq $cached_digest_filename) {
+ return $cached_digest; # We did, so here's what it was.
+ }
+
+ my $results = `md5sum "$filename"`;
+ chomp($results);
+ $results =~ s/^\s*//; # Not needed for GNU Textutils.
+ $results =~ s/[^a-fA-F0-9].*//; # Strip away end.
+ $cached_digest = $results; # Store in cache.
+ $cached_digest_filename = $filename;
+ return $results;
+}
+
+
+sub already_added {
+ # returns the first file's name with the same contents,
+ # else returns the empty string.
+
+ my $filename = shift;
+ my $digest = &get_digest($filename);
+
+ if ($previous_files{$digest}) {
+ return $previous_files{$digest};
+ } else {
+ return "";
+ }
+}
+
+sub close_lang_lists {
+ my $lang;
+ my $file;
+ while (($lang, $file) = each(%lang_list_files)) {
+ $file->close(); # Ignore any errors on close, there's little we can do.
+ }
+ %lang_list_files = ();
+}
+
+sub force_record_file_type {
+ my ($filename, $type) = @_;
+
+ if (!$type) {die "ERROR! File $filename, type $file_type\n";}
+ if ($type eq "c") {$type = "ansic";};
+ if (!defined($lang_list_files{$type})) {
+ $lang_list_files{$type} = new FileHandle("${dir}/${type}_list.dat", "w") ||
+ die "Could not open ${dir}/${type}_list.dat";
+ }
+ $lang_list_files{$type}->printf("%s\n", $filename);
+}
+
+
+sub record_file_type {
+ my ($filename, $type) = @_;
+ # First check if the file should be auto, dup, or zero - and add there
+ # if so. Otherwise, add to record of 'type'.
+
+ my $first_filename;
+
+ if (-z $filename) {
+ force_record_file_type($filename, "zero");
+ return;
+ }
+
+ if (&was_generated_automatically($filename)) {
+ force_record_file_type($filename, "auto");
+ return;
+ }
+
+ unless (($duplicates_okay) || ($type eq "not") || ($type eq "unknown")) {
+ $first_filename = &already_added($filename);
+ if ($first_filename) {
+ print "Note: $filename dups $first_filename\n" if $noisy;
+ force_record_file_type("$filename dups $first_filename", "dup");
+ $dup_count++;
+ return;
+ } else { # This isn't a duplicate - record that info, as needed.
+ my $digest = &get_digest($filename);
+ $previous_files{$digest} = $filename;
+ if ($duplistfile) {
+ print DUPLIST "$digest $filename\n";
+ }
+ }
+ }
+
+ force_record_file_type($filename, $type);
+}
+
+
+
+sub file_type_from_contents() {
+ # Determine if file type is a scripting language, and if so, return it.
+ # Returns its type as a string, or the empty string if it's undetermined.
+ my $filename = shift;
+ my $command;
+ chomp($filename);
+ reopen($filename);
+ # Don't do $firstline = <CODE_FILE> here because the file may be binary;
+ # instead, read in a fixed number of bytes:
+ read CODE_FILE, $firstline, 200;
+ return "" if (!$_);
+ chomp($firstline);
+ if (!$_) {return "";}
+ if (!$firstline) {return "";}
+
+ # Handle weirdness: If there's a ".cpp" file beginning with .\"
+ # then it clearly isn't C/C++... it's a man page. People who create
+ # and distribute man pages with such filename extensions should have
+ # a fingernail removed, slowly :-).
+ if (($firstline =~ m@^[,.]\\"@) &&
+ $filename =~ m@\.(c|cpp|C|cxx|cc)$@) {return "not";}
+
+
+ if (!($firstline =~ m@^#!@)) {return "";} # No script indicator here.
+
+ # studying $firstline doesn't speed things up, unfortunately.
+
+ # I once used a pattern that only acknowledged very specific directories,
+ # but I found that many test cases use unusual script locations
+ # (to ensure that they're invoking the correct program they're testing).
+ # Thus, we depend on the program being named with postfixed whitespace,
+ # and either begin named by itself or with a series of lowercase
+ # directories ending in "/".
+
+ # I developed these patterns by starting with patterns that appeared
+ # correct, and then examined the output (esp. warning messages) to see
+ # what I'd missed.
+
+ $command = "";
+ if ($firstline =~ m@^#!\s*/(usr/)?bin/env\s+([a-zA-Z0-9\._]+)(\s|\Z)@) {
+ $command = $2;
+ } elsif ($firstline =~ m@^#!\s*([a-zA-Z0-9\/\.]+\/)?([a-zA-Z0-9\._]+)(\s|\Z)@) {
+ $command = $2;
+ }
+
+ if ( ($command =~ m/^(bash|ksh|zsh|pdksh|sh)[0-9\.]*(\.exe)?$/) ||
+ ($firstline =~
+ m~^#!\s*\@_?(SCRIPT_)?(PATH_)?(BA|K)?SH(ELL)?(\d+)?\@?(\s|\Z)~)) {
+ # Note: wish(1) uses a funny trick; see wish(1) for more info.
+ # The following code detects this unusual wish convention.
+ if ($firstline =~ m@exec wish(\s|\Z)@) {
+ return "tcl"; # return the type for wish.
+ }
+ # Otherwise, it's shell.
+ return "sh";
+ }
+ if ( ($command =~ m/^(t?csh\d*)[0-9\.]*(\.exe)?$/) ||
+ ($firstline =~ m@^#!\s*xCSH_PATHx(\s|\Z)@)) {
+ return "csh";
+ }
+ if ( ($command =~ m/^(mini)?perl[0-9\.]*(\.exe)?$/) ||
+ ($firstline =~ m~^#!\s*\@_?(PATH_)?PERL\d*(PROG)?\@(\s|\Z)~) ||
+ ($firstline =~ m~^#!\s*xPERL_PATHx(\s|\Z)~)) {
+ return "perl";
+ }
+ if ($command =~ m/^python[0-9\.]*(\.exe)?$/) {
+ return "python";
+ }
+ if ($command =~ m/^(tcl|tclsh|bltwish|wish|wishx|WISH)[0-9\.]*(\.exe)?$/) {
+ return "tcl";
+ }
+ if ($command =~ m/^expectk?[0-9\.]*(\.exe)?$/) { return "exp"; }
+ if ($command =~ m/^[ng]?awk[0-9\.]*(\.exe)?$/) { return "awk"; }
+ if ($command =~ m/^sed$/) { return "sed"; }
+ if ($command =~ m/^guile[0-9\.]*$/) { return "lisp"; }
+ if ($firstline =~ m@^#!.*make\b@) { # We'll claim that #! make is a makefile.
+ return "makefile";
+ }
+ if ($firstline =~ m@^#!\s*\.(\s|\Z)@) { # Lonely period.
+ return ""; # Ignore the first line, it's not helping.
+ }
+ if ($firstline =~ m@^#!\s*\Z@) { # Empty line.
+ return ""; # Ignore the first line, it's not helping.
+ }
+ if ($firstline =~ m@^#!\s*/dev/null@) { # /dev/null is the script?!?
+ return ""; # Ignore nonsense ("/dev/null").
+ }
+ if ($firstline =~ m@^#!\s*/unix(\s|Z)@) {
+ return ""; # Ignore nonsense ("/unix").
+ }
+ if (($filename =~ m@\.pl$@) || ($filename =~ m@\.pm$@)) {
+ return ""; # Don't warn about files that will be ID'd as perl files.
+ }
+ if (($filename =~ m@\.sh$@)) {
+ return ""; # Don't warn about files that will be ID'd as sh files.
+ }
+ if ($firstline =~ m@^#!\s*\S@) {
+ $firstline =~ s/\n.*//s; # Delete everything after first line.
+ $warning_from_first_line = "WARNING! File $filename has unknown start: $firstline";
+ return "";
+ }
+ return "";
+}
+
+
+sub get_file_type {
+ my $file_to_examine = shift;
+ # Return the given file's type.
+ # It looks at the contents, then the filename, then file extension.
+
+ $warning_from_first_line = "";
+
+ # Skip file names known to not be program files.
+ $basename = $file_to_examine;
+ $basename =~ s!^.*/!!;
+ if ($not_code_filenames{$basename}) {
+ print "Note: Skipping non-program filename: $file_to_examine\n"
+ if $noisy;
+ return "not";
+ }
+
+ # Skip "configure" files if there's a corresponding "configure.in"
+ # file; such a situation suggests that "configure" is automatically
+ # generated by "autoconf" from "configure.in".
+ if (($file_to_examine =~ m!/configure$!) &&
+ (-s "${file_to_examine}.in")) {
+ print "Note: Auto-generated configure file $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+
+ if (($basename eq "lex.yy.c") || # Flex/Lex output!
+ ($basename eq "lex.yy.cc") || # Flex/Lex output - C++ scanner.
+ ($basename eq "y.code.c") || # yacc/bison output.
+ ($basename eq "y.tab.c") || # yacc output.
+ ($basename eq "y.tab.h")) { # yacc output.
+ print "Note: Auto-generated lex/yacc file $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+
+ # Bison is more flexible than yacc -- it can create arbitrary
+ # .c/.h files. If we have a .tab.[ch] file, with a corresponding
+ # .y file, then it's been automatically generated.
+ # Bison can actually save to any filename, and of course a Makefile
+ # can rename any file, but we can't help that.
+ if ($basename =~ m/\.tab\.[ch]$/) {
+ $possible_bison = $file_to_examine;
+ $possible_bison =~ s/\.tab\.[ch]$/\.y/;
+ if (-s "$possible_bison") {
+ print "Note: found bison-generated file $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+ }
+
+ # If there's a corresponding ".MASTER" file, treat this file
+ # as automatically-generated derivative. This handles "exmh".
+ if (-s "${file_to_examine}.MASTER") {
+ print "Note: Auto-generated non-.MASTER file $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+
+ # Peek at first line to determine type. Note that the file contents
+ # take precedence over the filename extension, because there are files
+ # (such as /usr/src/redhat/BUILD/teTeX-1.0/texmf/doc/mkhtml.nawk)
+ # which have one extension (say, ".nawk") but actually contain
+ # something else (at least in part):
+ $type = &file_type_from_contents($file_to_examine);
+ if ($type) {
+ return $type;
+ }
+
+ # Use filename to determine if it's a makefile:
+ if (($file_to_examine =~ m/\bmakefile$/i) ||
+ ($file_to_examine =~ m/\bmakefile\.txt$/i) ||
+ ($file_to_examine =~ m/\bmakefile\.pc$/i)) {
+ return "makefile";
+ }
+
+ # Try to use filename extension to determine type:
+ if ($file_to_examine =~ m/\.([^.\/]+)$/) {
+ $type = $1;
+
+ # Is this type known to NOT be a program?
+ if ($not_code_extensions{$type}) {
+ return "not";
+ }
+
+ # Handle weirdness: ".hpp" is a C/C++ header file, UNLESS it's
+ # makefile.hpp (a makefile); see /usr/src/redhat/BUILD,
+ # pine4.21/pine/makefile.hpp and pine4.21/pico/makefile.hpp
+ # Note that pine also includes pine4.21/pine/osdep/diskquot.hpp.
+ # Kaffe uses .hpp for C++ header files.
+ if (($type eq "hpp") && ($file_to_examine =~ m/makefile\.hpp$/))
+ {return "makefile";}
+
+ # If it's a C file but there's a ".pc" or ".pgc" file, then presume that
+ # it was automatically generated:
+ if ($type eq "c") {
+ $pc_name = $file_to_examine;
+ $pc_name =~ s/\.c$/\.pc/;
+ if (-s "$pc_name" ) {
+ print "Note: Auto-generated C file (from .pc file) $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+ $pc_name = $file_to_examine;
+ $pc_name =~ s/\.c$/\.pgc/;
+ if (-s "$pc_name" ) {
+ print "Note: Auto-generated C file (from .pgc file) $file_to_examine\n"
+ if $noisy;
+ return "auto";
+ }
+ }
+
+ # ".pc" is the official extension for Oracle C programs with
+ # Embedded C commands, but many programs use ".pc" to indicate
+ # the "PC" (MS-DOS/Windows) version of a file.
+ # We'll use heuristics to detect when it's not really C,
+ # otherwise claim it's C and move on.
+ if ($type eq "pc") { # If it has one of these filenames, it's not C.
+ if ($file_to_examine =~ m/\bmakefile\.pc$/i) { return "makefile"; }
+ if (($file_to_examine =~ m/\bREADME\.pc$/i) ||
+ ($file_to_examine =~ m/\binstall\.pc$/i) ||
+ ($file_to_examine =~ m/\bchanges\.pc$/i)) {return "not";}
+ else { return "c";}
+ }
+
+ if (defined($file_extensions{$type})) {
+ $type = $file_extensions{$type};
+ if ( (($type eq "exp") && (!&really_is_expect($file_to_examine))) ||
+ (($type eq "tk") && (!&really_is_expect($file_to_examine))) ||
+ (($type eq "objc") && (!&really_is_objc($file_to_examine))) ||
+ (($type eq "lex") && (!&really_is_lex($file_to_examine))) ||
+ (($type eq "pascal") && (!&really_is_pascal($file_to_examine))) ||
+ (($type eq "inc") && (!&really_is_php($file_to_examine))))
+ {$type = "unknown";}
+ if ($type eq "inc") { $type = "php"; }; # Hey, the .inc is PHP!
+ return $type;
+ }
+
+ }
+ # If we were expecting a script, warn about that.
+ if ($warning_from_first_line) {print "$warning_from_first_line\n";}
+ # Don't know what it is, so report "unknown".
+ return "unknown";
+}
+
+
+
+
+sub convert_h_files {
+ # Determine if the ".h" files we saw are C, OBJC, C++, or a mixture (!)
+ # Usually ".hpp" files are C++, but if we didn't see any C++ files then
+ # it probably isn't. This handles situations like pine; its has a file
+ # /usr/src/redhat/BUILD/pine4.21/pine/osdep/diskquot.hpp
+ # where the ".hpp" is for HP, not C++. (Of course, we completely miss
+ # the other files in that pine directory because they have truly bizarre
+ # extensions, but there's no easy way to handle such nonstandard things).
+
+ if (!defined($lang_list_files{"h"})) { return; }
+
+ my $saw_ansic = defined($lang_list_files{"ansic"});
+ my $saw_cpp = defined($lang_list_files{"cpp"});
+ my $saw_objc = defined($lang_list_files{"objc"});
+ my $confidence;
+
+ $lang_list_files{"h"}->close();
+
+ open(H_LIST, "<${dir}/h_list.dat") || die "Can't reopen h_list\n";
+
+ if ($saw_ansic && (!$saw_cpp) && (!$saw_objc)) {
+ # Only C, let's assume .h files are too
+ while (<H_LIST>) { chomp; force_record_file_type($_, "c"); };
+ } elsif ($saw_cpp && (!$saw_ansic) && (!$saw_objc)) { # Only C++
+ while (<H_LIST>) { chomp; force_record_file_type($_, "cpp"); };
+ } elsif ($saw_objc && (!$saw_ansic) && (!$saw_cpp)) { # Only Obj-C
+ while (<H_LIST>) { chomp; force_record_file_type($_, "objc"); };
+ } else {
+ # Ugh, we have a mixture. Let's try to determine what we have, using
+ # various heuristics (looking for a matching name in the directory,
+ # reading the file contents, the contents in the directory, etc.)
+ # When all else fails, assume C.
+ while (<H_LIST>) {
+ chomp;
+ next if (!$_);
+ # print "DEBUG: H file $_\n";
+
+ $h_file = $_;
+ $cpp2_equivalent =
+ $cpp3_equivalent = $cpp4_equivalent = $objc_equivalent = $_;
+ $ansic_equivalent = $cpp_equivalent = $_;
+ $ansic_equivalent =~ s/h$/c/;
+ $cpp_equivalent =~ s/h$/C/;
+ $cpp2_equivalent =~ s/h$/cpp/;
+ $cpp3_equivalent =~ s/h$/cxx/;
+ $cpp4_equivalent =~ s/h$/cc/;
+ $objc_equivalent =~ s/h$/m/;
+ if (m!\.hpp$!) { force_record_file_type($h_file, "cpp"); }
+ elsif ( (-s $cpp2_equivalent) ||
+ (-s $cpp3_equivalent) || (-s $cpp4_equivalent))
+ { force_record_file_type($h_file, "cpp"); }
+ # Note: linuxconf has many ".m" files that match .h files,
+ # but the ".m" files are straight C and _NOT_ objective-C.
+ # The following test handles cases like this:
+ elsif ($saw_objc && (-s $objc_equivalent) &&
+ &really_is_objc($objc_equivalent))
+ { &force_record_file_type($h_file, "objc"); }
+ elsif (( -s $ansic_equivalent) && (! -s $cpp_equivalent))
+ { force_record_file_type($h_file, "c"); }
+ elsif ((-s $cpp_equivalent) && (! -s $ansic_equivalent))
+ { force_record_file_type($h_file, "cpp"); }
+ else {
+ $confidence = &looks_like_cpp($h_file);
+ if ($confidence == 2)
+ { &force_record_file_type($h_file, "cpp"); }
+ else {
+ $files_in_dir = &examine_dir($h_file);
+ if ($files_in_dir eq "cpp")
+ { &force_record_file_type($h_file, "cpp"); }
+ elsif ($files_in_dir eq "objc")
+ { &force_record_file_type($h_file, "objc"); }
+ elsif ($confidence == 1)
+ { &force_record_file_type($h_file, "cpp"); }
+ elsif ($h_file =~ m![a-z][0-9]*\.H$!)
+ # Mixed-case filename, .H extension.
+ { &force_record_file_type($h_file, "cpp"); }
+ else # We're clueless. Let's guess C.
+ { &force_record_file_type($h_file, "c"); };
+ }
+ }
+ }
+ } # Done handling ".h" files.
+ close(H_LIST);
+}
+
+
+# MAIN PROGRAM STARTS HERE.
+
+# Handle options.
+while (($#ARGV >= 0) && ($ARGV[0] =~ m/^--/)) {
+ $duplicates_okay = 1 if ($ARGV[0] =~ m/^--duplicates$/); # Count duplicates.
+ $crossdups_okay = 1 if ($ARGV[0] =~ m/^--crossdups$/); # Count crossdups.
+ $autogen_okay = 1 if ($ARGV[0] =~ m/^--autogen$/); # Count autogen.
+ $noisy = 1 if ($ARGV[0] =~ m/^--verbose$/); # Verbose output.
+ if ($ARGV[0] =~ m/^--duplistfile$/) { # File to get/record dups.
+ shift;
+ $duplistfile = $ARGV[0];
+ }
+ last if ($ARGV[0] =~ m/^--$/);
+ shift;
+}
+
+if ($#ARGV < 0) {
+ print "Error: No directory names given.\n";
+ exit(1);
+}
+
+if ($duplistfile) {
+ if (-e $duplistfile) {
+ open(DUPLIST, "<$duplistfile") || die "Can't open $duplistfile";
+ while (<DUPLIST>) {
+ chomp;
+ ($digest, $filename) = split(/ /, $_, 2);
+ if (defined($digest) && defined($filename)) {
+ $previous_files{$digest} = $filename;
+ }
+ }
+ close(DUPLIST);
+ }
+ open(DUPLIST, ">>$duplistfile") || die "Can't open for writing $duplistfile";
+}
+
+
+while ( $dir = shift ) {
+
+ if (! -d "$dir") {
+ print "Skipping non-directory $dir\n";
+ next;
+ }
+
+ if ($examined_directories{$dir}) {
+ print "Skipping already-examined directory $dir\n";
+ next;
+ }
+ $examined_directories{$dir} = 1;
+
+ if (! open(FILELIST, "<${dir}/filelist")) {
+ print "Skipping directory $dir; it doesn't contain a file 'filelist'\n";
+ next;
+ }
+
+ if (-r "${dir}/all-physical.sloc") {
+ # Skip already-analyzed directories; if it's been analyzed, we've already
+ # broken them down.
+ next;
+ }
+
+ if ($crossdups_okay) { # Cross-dups okay; forget the hash of previous files.
+ %previous_files = ();
+ }
+
+ # insert blank lines, in case we need to recover from a midway crash
+ if ($duplistfile) {
+ print DUPLIST "\n";
+ }
+
+
+ $dup_count = 0;
+
+ while (<FILELIST>) {
+ chomp;
+ $file = $_;
+ next if (!defined($file) || ($file eq ""));
+ $file_type = &get_file_type($file);
+ if ($file_type) {
+ &record_file_type($file, $file_type);
+ } else {
+ print STDERR "WARNING! No file type selected for $file\n";
+ }
+ }
+
+ # Done with straightline processing. Now we need to determine if
+ # the ".h" files we saw are C, OBJC, C++, or a mixture (!)
+ &convert_h_files();
+
+
+ # Done processing the directory. Close up shop so we're
+ # ready for the next directory.
+
+ close(FILELIST);
+ close_lang_lists();
+ reopen(""); # Close code file.
+
+ if ($dup_count > 50) {
+ print "Warning: in $dir, number of duplicates=$dup_count\n";
+ }
+
+}
+
+
diff --git a/c_count.c b/c_count.c
new file mode 100644
index 0000000..8581e55
--- /dev/null
+++ b/c_count.c
@@ -0,0 +1,225 @@
+/* c_count: given a list of C/C++/Java files on the command line,
+ count the SLOC in each one. SLOC = physical, non-comment lines.
+ This program knows about C++ and C comments (and how they interact),
+ and correctly ignores comment markers inside strings.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+ Usage: Use in one of the following ways:
+ c_count # As filter
+ c_count [-f file] [list_of_files]
+ file: file with a list of files to count (if "-", read list from stdin)
+ list_of_files: list of files to count
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* Modes */
+#define NORMAL 0
+#define INSTRING 1
+#define INCOMMENT 2
+
+/* Types of comments: */
+#define ANSIC_STYLE 0
+#define CPP_STYLE 1
+
+/* Not all C compilers support a boolean type, so for portability's sake,
+ we'll fake it. */
+#define BOOLEAN int
+#define TRUE 1
+#define FALSE 0
+
+
+/* Globals */
+long total_sloc;
+
+static BOOLEAN warn_embedded_newlines = FALSE;
+
+int peek(FILE *stream) {
+ int c = getc(stream);
+ ungetc(c, stream);
+ return c;
+}
+
+int ispeek(int c, FILE *stream) {
+ if (c == peek(stream)) {return 1;}
+ return 0;
+}
+
+long line_number;
+
+int getachar(FILE *stream) {
+/* Like getchar(), but keep track of line number. */
+ static BOOLEAN last_char_was_newline = 0;
+ int c;
+
+ c = getc(stream);
+ if (last_char_was_newline) line_number++;
+ if (c == '\n') last_char_was_newline=1;
+ else last_char_was_newline=0;
+ return c;
+}
+
+
+long sloc_count(char *filename, FILE *stream) {
+ /* Count the sloc in the program in stdin. */
+
+ long sloc = 0;
+
+ int sawchar = 0; /* Did you see a character on this line? */
+ int c;
+ int mode = NORMAL; /* NORMAL, INSTRING, or INCOMMENT */
+ int comment_type = ANSIC_STYLE; /* ANSIC_STYLE or CPP_STYLE */
+
+
+ /* The following implements a state machine with transitions; the
+ main state is "mode" and "comment_type", the transitions are
+ triggered by characters input. */
+
+ while ( (c = getachar(stream)) != EOF) {
+ if (mode == NORMAL) {
+ if (c == '"') {sawchar=1; mode = INSTRING;}
+ else if (c == '\'') { /* Consume single-character 'xxxx' values */
+ sawchar=1;
+ c = getachar(stream);
+ if (c == '\\') c = getachar(stream);
+ do {
+ c = getachar(stream);
+ } while ((c != '\'') && (c != '\n') & (c != EOF));
+ } else if ((c == '/') && ispeek('*', stream)) {
+ c = getachar(stream);
+ mode = INCOMMENT;
+ comment_type = ANSIC_STYLE;
+ } else if ((c == '/') && ispeek('/', stream)) {
+ c = getachar(stream);
+ mode = INCOMMENT;
+ comment_type = CPP_STYLE;
+ } else if (!isspace(c)) {sawchar = 1;}
+ } else if (mode == INSTRING) {
+ /* We only count string lines with non-whitespace -- this is to
+ gracefully handle syntactically invalid programs.
+ You could argue that multiline strings with whitespace are
+ still executable and should be counted. */
+ if (!isspace(c)) sawchar = 1;
+ if (c == '"') {mode = NORMAL;}
+ else if ((c == '\\') && (ispeek('\"', stream) || ispeek('\\', stream))) {c = getachar(stream);}
+ else if ((c == '\\') && ispeek('\n', stream)) {c = getachar(stream);}
+ else if ((c == '\n') && warn_embedded_newlines) {
+ /* We found a bare newline in a string without preceding backslash. */
+ fprintf(stderr, "c_count WARNING - newline in string, line %ld, file %s\n", line_number, filename);
+ /* We COULD warn & reset mode to "Normal", but lots of code does this,
+ so we'll just depend on the warning for ending the program
+ in a string to catch syntactically erroneous programs. */
+ }
+ } else { /* INCOMMENT mode */
+ if ((c == '\n') && (comment_type == CPP_STYLE)) { mode = NORMAL;}
+ if ((comment_type == ANSIC_STYLE) && (c == '*') &&
+ ispeek('/', stream)) { c= getachar(stream); mode = NORMAL;}
+ }
+ if (c == '\n') {
+ if (sawchar) sloc++;
+ sawchar = 0;
+ }
+ }
+ /* We're done with the file. Handle EOF-without-EOL. */
+ if (sawchar) sloc++;
+ sawchar = 0;
+ if ((mode == INCOMMENT) && (comment_type == CPP_STYLE)) { mode = NORMAL;}
+
+ if (mode == INCOMMENT) {
+ fprintf(stderr, "c_count ERROR - terminated in comment in %s\n", filename);
+ } else if (mode == INSTRING) {
+ fprintf(stderr, "c_count ERROR - terminated in string in %s\n", filename);
+ }
+
+ return sloc;
+}
+
+
+void count_file(char *filename) {
+ long sloc;
+ FILE *stream;
+
+ stream = fopen(filename, "r");
+ line_number = 1;
+ sloc = sloc_count(filename, stream);
+ total_sloc += sloc;
+ printf("%ld %s\n", sloc, filename);
+ fclose(stream);
+}
+
+char *read_a_line(FILE *file) {
+ /* Read a line in, and return a malloc'ed buffer with the line contents.
+ Any newline at the end is stripped.
+ If there's nothing left to read, returns NULL. */
+
+ /* We'll create a monstrously long buffer to make life easy for us: */
+ char buffer[10000];
+ char *returnval;
+ char *newlinepos;
+
+ returnval = fgets(buffer, sizeof(buffer), file);
+ if (returnval) {
+ newlinepos = buffer + strlen(buffer) - 1;
+ if (*newlinepos == '\n') {*newlinepos = '\0';};
+ return strdup(buffer);
+ } else {
+ return NULL;
+ }
+}
+
+
+int main(int argc, char *argv[]) {
+ long sloc;
+ int i;
+ FILE *file_list;
+ char *s;
+
+ total_sloc = 0;
+ line_number = 1;
+
+ if (argc <= 1) {
+ sloc = sloc_count("-", stdin);
+ printf("%ld %s\n", sloc, "-");
+ total_sloc += sloc;
+ } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) {
+ if (!strcmp (argv[2], "-")) {
+ file_list = stdin;
+ } else {
+ file_list = fopen(argv[2], "r");
+ }
+ if (file_list) {
+ while ((s = read_a_line(file_list))) {
+ count_file(s);
+ free(s);
+ }
+ }
+ } else {
+ for (i=1; i < argc; i++) { count_file(argv[i]); }
+ }
+ printf("Total:\n");
+ printf("%ld\n", total_sloc);
+ return 0; /* Report success */
+}
+
diff --git a/c_lines_environment.dat b/c_lines_environment.dat
new file mode 100644
index 0000000..e4a99a0
--- /dev/null
+++ b/c_lines_environment.dat
@@ -0,0 +1,98 @@
+ Temporary Project Name (* Project_Name,in 45 spaces *)
+0 (* QA_Switch *)
+1 (* Compare_Spec *)
+999 (* Line_Length *)
+1000 (* Exec_Lines *)
+500 (* Data_Lines *)
+60.0 (* Min_Percent *)
+0.0 (* Inc_Percent *)
+0 (* Display_File *)
+0 (* Intro_Msg *)
+P (* SLOC_Def *)
+(*---------------------------------------------------------------------------*)
+(* *)
+(* Refer to the source code file, 'c_lines.c', for further information *)
+(* pertaining to the INSTALLATION PROCEDURES and EXECUTION PROCEDURES of *)
+(* this code counting tool. *)
+(* *)
+(* Note: *)
+(* 1. The above user-defined parameters must be spaced one entry per line *)
+(* of this file. Numeric entries, with the exception of 'Inc_Percent', *)
+(* are of type Integer. *)
+(* *)
+(* 2. The 'c_lines_environment.dat' file must be co-located in the *)
+(* directory/path whereas the code counting tool is to be invoked. *)
+(* Failure to do so will result in the insertion of predefined default *)
+(* values for the entries contained herein. *)
+(* *)
+(*---------------------------------------------------------------------------*)
+(* *)
+(* USER DEFINEABLE PARAMETERS *)
+(* *)
+(* Project_Name -- Allows the user to insert the name of the Program or *)
+(* Project that the source code to be counted pertains. *)
+(* The Project_Name will appear within at the headings of *)
+(* of the 'c_outfile.dat' file produced upon execution *)
+(* of the 'c_lines' code counting tool. *)
+(* *)
+(* QA_Switch -- Allows the user to turn on '1' or to turn off '0' the *)
+(* reporting of programming language reserve word usage *)
+(* as found in the summary page of 'c_outfile.dat'. *)
+(* *)
+(* Compare_Spec -- Allows the user to control the case sensitivity of the *)
+(* code counting tool. A setting of '1' indicates that *)
+(* full case sensitive comparisons must be made. A setting*)
+(* of '0' allows valid comparisons to occur between like *)
+(* letters of upper and lower case. *)
+(* *)
+(* Line_Length -- Allows user to force the code counting tool to ignore *)
+(* information beyond 'Line_Length' characters per physical*)
+(* line of input. It is recommended that the length of *)
+(* the longest physical line to be read be used, i.e. 132. *)
+(* *)
+(* Exec_Lines -- Allows the user to set a threshold whereby the number *)
+(* of files processed with executable lines in exceedance *)
+(* of 'Exec_Lines' will be reported on the summary page of *)
+(* 'c_outfile.dat'. *)
+(* *)
+(* Data_Lines -- Allows the user to set a threshold whereby the number *)
+(* of files processed with data declaration lines in *)
+(* exceedance of 'Data_Lines' will be reported on the *)
+(* summary page of 'c_outfile.dat'. *)
+(* *)
+(* Min_Percent -- Allows the user to set a threshold whereby the number *)
+(* of files processed with a ratio of comments (whole & *)
+(* embedded) to SLOC (physical or logical) is less than *)
+(* 'Min_Percent'. *)
+(* *)
+(* Inc_Percent -- Allows the user to set a progress increment whereby a *)
+(* progress message will appear on the terminal screen *)
+(* during execution of the 'c_lines' tool. The progress *)
+(* message indicates that approximately 'Inc_Percent' of *)
+(* source code files to be processed have completed since *)
+(* the previous progress message appeared. The progress *)
+(* reporting is based solely on the number of files *)
+(* contained in 'c_list.dat'. Actual run-time progress *)
+(* is dependent on the relative size of each source code *)
+(* file and the user loading of the host platform machine. *)
+(* A setting of 0.0 will disable the reporting of the *)
+(* progress message. *)
+(* *)
+(* Display_File -- Allows the user to turn on '1' or to turn off '0' the *)
+(* reporting of last file to be processed within the *)
+(* c_list.dat file. *)
+(* *)
+(* Intro_Msg -- Allows the user to turn on '1' or to turn off '0' the *)
+(* output of the introduction message as the first page *)
+(* of the 'c_outfile.dat' file. *)
+(* *)
+(* SLOC_Def -- Allows the user to select the defintion of a Source *)
+(* Line of Code (SLOC) to be used during the operation of *)
+(* the CodeCount tool. A setting of 'P' envokes the SLOC *)
+(* definition of Physical lines, a.k.a., non-comment, *)
+(* non-blank, physical lines of code or Deliverable Source *)
+(* Instructions (DSIs). A setting of 'L' envokes the SLOC *)
+(* definition of Logical lines, a.k.a., non-comment, *)
+(* non-blank, logical lines of code. *)
+(* *)
+(*---------------------------------------------------------------------------*)
diff --git a/c_outfile.dat b/c_outfile.dat
new file mode 100644
index 0000000..7d60dbc
--- /dev/null
+++ b/c_outfile.dat
@@ -0,0 +1 @@
+ERROR, unable to read c_list.dat file
diff --git a/cobol_count b/cobol_count
new file mode 100755
index 0000000..adda598
--- /dev/null
+++ b/cobol_count
@@ -0,0 +1,82 @@
+#!/usr/bin/perl
+# cobol_count - count physical lines of code.
+# Usage: cobol_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+# This is a trivial/naive program for scripts, etc.
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $filewithlist\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+ my $free_format = 0; # Support "free format" source code.
+
+ open (FILE, $file);
+ while (<FILE>) {
+ if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
+ if ($free_format) {
+ if (m/^\s*[\*\/]/) { } # Saw a comment.
+ elsif (m/\S/) {$sloc++;} # Saw a non-whitespace, it's SLOC.
+ } else {
+ if (m/^......[\*\/]/) {} # Saw a comment - marked in indicator area.
+ elsif (m/^[\*\/]/) {} # Saw a comment.
+ elsif (m/^........*\S/) {$sloc++;} # Saw a non-whitespace, it's SLOC.
+ }
+ }
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+ close (FILE);
+}
diff --git a/compute_all b/compute_all
new file mode 100755
index 0000000..20d5dcd
--- /dev/null
+++ b/compute_all
@@ -0,0 +1,87 @@
+#!/bin/sh
+# Computes filecounts and SLOC counts in the listed data directories
+# if the don't already exist.
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+if [ "$#" -eq 0 ]
+then
+ echo "Error: You must provide a list of directories to examine."
+ exit 1
+fi
+
+starting_dir=`pwd`
+
+for dir
+do
+ if [ -d "$dir" -a -r "${dir}/filelist" ]
+ then
+ cd "$dir"
+
+ if [ ! -r all.filecount ]
+ then
+ # Create all.filecount and all-physical.sloc; create them in
+ # separate files, so that we can safely restart if it stops in the middle:
+ > all.filecount.new
+ > all-physical.sloc.new
+ for listfile in *_list.dat
+ do
+ language=`echo $listfile | sed -e 's/_list\.dat$//'`
+
+ # Skip language "*" - this happens if there are NO source
+ # files in the given directory.
+ if [ "$language" = "*" ]; then
+ continue
+ fi
+
+ # Skip language "h" - it's counted in the ansic, cpp, and objc lists.
+ if [ "$language" = "h" ]; then
+ continue
+ fi
+
+ numfiles=`wc -l < $listfile | tr -d " "`
+ echo "$language $numfiles" >> all.filecount.new
+
+ # Ignore certain "languages" when counting SLOC:
+ case "$language"
+ in
+ not) true ;;
+ unknown) true ;;
+ zero) true ;;
+ dup) true ;;
+ auto) true ;;
+ *)
+ numsloc=`compute_sloc_lang $language "." | tr -d " "`
+ echo "$language $numsloc" >> all-physical.sloc.new
+ ;;
+ esac
+ done
+ mv all.filecount.new all.filecount
+ mv all-physical.sloc.new all-physical.sloc
+ fi
+
+ cd "$starting_dir"
+ fi
+done
+
diff --git a/compute_c_usc b/compute_c_usc
new file mode 100755
index 0000000..96ec59c
--- /dev/null
+++ b/compute_c_usc
@@ -0,0 +1,77 @@
+#!/bin/sh
+# Computes C and C++ code sizes for the list of directories given;
+# each directory must contain ansic_list.dat and cpp_list.dat.
+#
+# Change the following if it's the wrong place:
+
+C_DATA_ENV_FILE="/home/dwheeler/sloc/bin/c_lines_environment.dat"
+
+# Unfortunately, USC's code fails when c_list.dat is 0-length,
+# so we work around it.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+
+starting_dir=`pwd`
+
+for dir in $@
+do
+ if [ -d "$dir" -a -r "${dir}/filelist" ]
+ then
+ # ??? DEBUG: print out the name of each directory.
+ echo " $dir"
+ cd $dir
+ cp $C_DATA_ENV_FILE .
+
+ if [ -s ansic_list.dat ]
+ then
+ rm -f c_list.dat
+ ln -s ansic_list.dat c_list.dat
+ c_lines
+ mv c_outfile.dat ansic_outfile.dat.usc
+ extract-count < ansic_outfile.dat.usc
+ mv logical.sloc ansic-logical.sloc
+ mv physical.sloc ansic-physical.sloc.usc
+ else
+ echo 0 > ansic-logical.sloc
+ echo 0 > ansic-physical.sloc.usc
+ fi
+
+ if [ -s cpp_list.dat ]
+ then
+ rm -f c_list.dat
+ ln -s cpp_list.dat c_list.dat
+ c_lines
+ mv c_outfile.dat cpp_outfile.dat.usc
+ extract-count < cpp_outfile.dat.usc
+ mv logical.sloc cpp-logical.sloc
+ mv physical.sloc cpp-physical.sloc.usc
+ else
+ echo 0 > cpp-logical.sloc
+ echo 0 > cpp-physical.sloc.usc
+ fi
+
+ cd $starting_dir
+ fi
+done
diff --git a/compute_java_usc b/compute_java_usc
new file mode 100755
index 0000000..a0ffb25
--- /dev/null
+++ b/compute_java_usc
@@ -0,0 +1,59 @@
+#!/bin/sh
+# Computes Java sloc in the listed directories.
+# each directory must contain ansic_list.dat and cpp_list.dat.
+
+# Change the following if it's the wrong place:
+
+JAVA_DATA_ENV_FILE="/home/dwheeler/sloc/bin/java_lines_environment.dat"
+
+# Unfortunately, USC's code fails when c_list.dat is 0-length,
+# so we work around it.
+
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+
+starting_dir=`pwd`
+
+for dir in $@
+do
+ if [ -d "$dir" -a -r "${dir}/filelist" ]
+ then
+ cd $dir
+ cp $JAVA_DATA_ENV_FILE .
+
+ if [ -s java_list.dat ]
+ then
+ java_lines
+ extract-count < java_outfile.dat
+ mv logical.sloc java-logical.sloc
+ mv physical.sloc java-physical.sloc
+ else
+ echo 0 > java-logical.sloc
+ echo 0 > java-physical.sloc
+ fi
+
+ cd $starting_dir
+ fi
+done
diff --git a/compute_sloc_lang b/compute_sloc_lang
new file mode 100755
index 0000000..df635f7
--- /dev/null
+++ b/compute_sloc_lang
@@ -0,0 +1,66 @@
+#!/bin/sh
+# Computes sloc in the listed directories.
+# first parameter = language.
+# Creates the "outfile", and prints the total.
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+language="$1"
+shift
+
+starting_dir=`pwd`
+
+for dir in "$@"
+do
+ if [ -d "$dir" -a -r "${dir}/filelist" ]
+ then
+ cd "$dir"
+
+ if [ -s ${language}_list.dat ]
+ then
+ case "$language" in
+ ansic) use_c_count="y" ;;
+ cpp) use_c_count="y" ;;
+ java) use_c_count="y" ;;
+ yacc) use_c_count="y" ;;
+ cs) use_c_count="y" ;;
+ *) use_c_count="n" ;;
+ esac
+
+ case "$use_c_count" in
+ y) c_count -f ${language}_list.dat > ${language}_outfile.dat ;;
+ *) ${language}_count -f ${language}_list.dat > ${language}_outfile.dat
+ ;;
+ esac
+ tail -1 < ${language}_outfile.dat
+
+ else
+ rm -f ${language}_outfile.dat
+ echo 0
+ fi
+
+ cd "$starting_dir"
+ fi
+done
+
diff --git a/count_extensions b/count_extensions
new file mode 100755
index 0000000..1547d3f
--- /dev/null
+++ b/count_extensions
@@ -0,0 +1,56 @@
+#!/usr/bin/perl -w
+
+# Read from standard input a list of filenames, and
+# report a sorted list of extensions and filenames
+# (most common ones first).
+
+# The format is "name count", where "count" is the number of appearances.
+# "name" usually begins with a "." followed by the name of the extension.
+# In the case where the filename has no extension, the name begins with "/"
+# followed by the entire basename.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+%extensions = ();
+
+while (<>) {
+ if (m/\.([^.\/]+)$/) {
+ $type = $1;
+ chomp($type);
+ $type = ".$type";
+ if (defined($extensions{$type})) { $extensions{$type}++; }
+ else { $extensions{$type} = 1; }
+ } elsif (m!/([^/]+)$!) {
+ $filename = $1;
+ chomp($filename);
+ $filename = "/$filename";
+ if (defined($extensions{$filename})) { $extensions{$filename}++; }
+ else { $extensions{$filename} = 1; }
+ }
+}
+
+foreach $entry (sort {$extensions{$b} <=> $extensions{$a}} keys %extensions) {
+ print "${entry} $extensions{$entry}\n";
+}
+
diff --git a/count_unknown_ext b/count_unknown_ext
new file mode 100755
index 0000000..cf18647
--- /dev/null
+++ b/count_unknown_ext
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+# This reports a sorted list of the "unknown" file extensions
+# analyzed by a previous run of SLOCCount, most common first.
+# Use this to make sure that there isn't a common language type
+# that you are NOT counting.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+
+
+find ${HOME}/.slocdata -name 'unknown_list.dat' -exec cat {} \; | \
+ count_extensions | less
+
diff --git a/csh_count b/csh_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/csh_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count '#' $@
+
diff --git a/dirmatch b/dirmatch
new file mode 100755
index 0000000..abe8d49
--- /dev/null
+++ b/dirmatch
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+# Dirmatch - take in standard input a list of directory name patterns,
+# then print the matches of the directory names from each item in the list.
+
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+
+while read line
+do
+ echo
+ echo "=== $line ==="
+ ls -1 | grep -i "$line"
+done
+
diff --git a/driver.c b/driver.c
new file mode 100644
index 0000000..1e5159e
--- /dev/null
+++ b/driver.c
@@ -0,0 +1,110 @@
+/* driver: given a list of files on the command line,
+ count the SLOC in each one.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+/* This is only included so that I can do some kinds of analysis
+ * separately on this file; normally this file is itself included: */
+#include "driver.h"
+
+
+
+void sloc_count(char *current_filename, FILE *stream) {
+ /* Count the sloc in the one file named "current_filename" in "stream",
+ * and add it to the total_sloc. */
+
+ filename = current_filename;
+ sloc = 0;
+ line_number = 1;
+ yyin = stream;
+
+ yylex();
+
+ total_sloc += sloc;
+}
+
+
+void count_file(char *current_filename) {
+ FILE *stream;
+
+ stream = fopen(current_filename, "r");
+ if (!stream) {
+ sloc = 0;
+ fprintf(stderr, "Error: Cannot open %s\n", current_filename);
+ return;
+ }
+ sloc_count(current_filename, stream);
+ printf("%ld %s\n", sloc, current_filename);
+ fclose(stream);
+}
+
+char *read_a_line(FILE *file) {
+ /* Read a line in, and return a malloc'ed buffer with the line contents.
+ Any newline at the end is stripped.
+ If there's nothing left to read, returns NULL. */
+
+ /* We'll create a monstrously long buffer to make life easy for us: */
+ char buffer[10000];
+ char *returnval;
+ char *newlinepos;
+
+ returnval = fgets(buffer, sizeof(buffer), file);
+ if (returnval) {
+ newlinepos = buffer + strlen(buffer) - 1;
+ if (*newlinepos == '\n') {*newlinepos = '\0';};
+ return strdup(buffer);
+ } else {
+ return NULL;
+ }
+}
+
+
+int main(int argc, char *argv[]) {
+ int i;
+ char *s;
+ FILE *file_list = NULL;
+
+ total_sloc = 0;
+
+ if (argc <= 1) {
+ sloc_count("-", stdin);
+ printf("%ld %s\n", sloc, "-");
+ } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) {
+ if (!strcmp (argv[2], "-")) {
+ file_list = stdin;
+ } else {
+ file_list = fopen(argv[2], "r");
+ }
+ if (file_list) {
+ while ((s = read_a_line(file_list))) {
+ count_file(s);
+ free(s);
+ }
+ }
+ } else {
+ for (i=1; i < argc; i++) { count_file(argv[i]); }
+ }
+ printf("Total:\n");
+ printf("%ld\n", total_sloc);
+ return 0; /* Report success */
+}
diff --git a/driver.h b/driver.h
new file mode 100644
index 0000000..ddeb331
--- /dev/null
+++ b/driver.h
@@ -0,0 +1,50 @@
+/* driver: given a list of files on the command line,
+ count the SLOC in each one.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+#ifndef DRIVER_H
+#define DRIVER_H
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* Not all C compilers support a boolean type, so for portability's sake,
+ we'll fake it. */
+#define BOOLEAN int
+#define TRUE 1
+#define FALSE 0
+
+
+/* Globals */
+unsigned long sloc; /* For current file */
+unsigned long line_number; /* Of current file */
+char *filename; /* Name of current file */
+
+unsigned long total_sloc; /* For all files seen */
+
+
+
+#endif
diff --git a/exp_count b/exp_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/exp_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count '#' $@
+
diff --git a/extract-count b/extract-count
new file mode 100755
index 0000000..548b261
--- /dev/null
+++ b/extract-count
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+
+# Given USC output as standard input, find the # of physical and logical SLOC, and save in
+# "physical.sloc" and "logical.sloc".
+
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$found =0;
+
+while (<>) {
+ if (m/^The Totals/) {
+ $found = 1;
+ last;
+ }
+}
+
+if (!$found) {
+ die "FAILED to find the totals section in code output.\n";
+}
+
+while (<>) {
+ # DEBUG: print "Read line: $_\n";
+ if (m/Physical/ || m/Logical/) {
+ s/^ *//;
+ ($total, $blank, $whole, $embedded, $compiler, $datadecl, $execinstruction,
+ $number_of_files, $sloc, $file_type, $sloc_definition ) = split(/[ \|]+/);
+ # DEBUG: print "Found match; file_type='${file_type}', sloc_definition='${sloc_definition}'\n";
+ if ($file_type =~ m/code/i) {
+ if ($sloc_definition =~ m/Physical/i) {
+ `echo $sloc > physical.sloc`
+ }
+ if ($sloc_definition =~ m/Logical/i) {
+ `echo $sloc > logical.sloc`
+ }
+ } elsif ($file_type =~ m/DATA/i) {
+ if ($number_of_files > 0) {
+ print STDERR "WARNING! NONZERO NUMBER OF DATA FILES!\n";
+ $pwd = `pwd`;
+ chomp($pwd);
+ print STDERR "Extract-count in directory ${pwd}.\n";
+ # The mere existence of this file is reason to check it out:
+ `echo $number_of_files > data.count`
+ }
+ }
+ }
+}
+
+
+__END__
+
+Here's a sample output (the beginning chopped off):
+
+ Temporary Project Name
+The Totals
+ Total Blank | Comments | Compiler Data Exec. | Number | File SLOC
+ Lines Lines | Whole Embedded | Direct. Decl. Instr. | of Files | SLOC Type Definition
+------------------------------------------------------------------------------------------------------------------------------------
+ 1938455 359776 | 146182 164828 | 0 12359 1420138 | 3172 | 1432497 CODE Physical
+ 1938455 359776 | 146182 164828 | 0 6507 613235 | 3172 | 619742 CODE Logical
+ 0 0 | 0 0 | 0 0 0 | 0 | 0 DATA Physical
+
diff --git a/extract_license b/extract_license
new file mode 100755
index 0000000..bde556e
--- /dev/null
+++ b/extract_license
@@ -0,0 +1,178 @@
+#!/usr/bin/perl
+# extract_license
+# Determine the license of a program, given 2 parameters:
+# (1) the directory containing the program's source code.
+# (2) the RPM spec file (which may be /dev/null)
+
+# This "regularizes" license names. For example,
+# BSD-style, BSDish, and BSD-like all become "BSD-like".
+# License names "sentence capitalization", e.g., "Freely distributable".
+# It also fixes a lot of errors in Red Hat spec files.
+
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$program_dir = shift;
+$rpm_spec = shift;
+
+$license = $copyright = "";
+
+%all_licenses = ();
+
+
+sub read_license_file() {
+ my $filename = shift(@_);
+ my $license = "";
+ if ((-s $filename) && open(LICENSE_FILE, "<$filename")) {
+ # TODO: detect even more licenses automatically.
+ # It'd hard to detect BSD/MIT licenses,
+ # because these licenses make changes in the MIDDLE of their text.
+ # Thus, it's hard to avoid falsely detecting "almost" licenses.
+ # For example, ipf has license text that looks like a BSD/MIT license,
+ # but it's not even open source.
+ # However, we CAN detect many other kinds, so let's at least do that.
+ for ($i=1; $i < 9; $i++) {
+ $line = <LICENSE_FILE>;
+ if ($line =~ m/GNU GENERAL PUBLIC LICENSE/i) {$license = "GPL";}
+ elsif ($line =~ m/GNU LIBRARY GENERAL PUBLIC LICENSE/i) {$license = "LGPL";}
+ elsif ($line =~ m/GNU LESSER GENERAL PUBLIC LICENSE/i) {$license = "LGPL";}
+ elsif ($line =~ m/Mozilla PUBLIC LICENSE/i) {$license = "MPL";}
+ elsif ($line =~ m/Netscape PUBLIC LICENSE/i) {$license = "NPL";}
+ elsif ($line =~ m/IBM PUBLIC LICENSE/i) {$license = "IBM Public License";}
+ elsif ($line =~ m/\bApache Software License\b/i) {$license = "Apache";}
+ elsif ($line =~ m/\bThe "Artistic License"/i) {$license = "Artistic";}
+ }
+ close(LICENSE_FILE);
+ }
+ return $license;
+}
+
+sub add_license() {
+ # Add to license list "all_licenses" the license in the given file, if one.
+ my $filename = shift(@_);
+ my $license = &read_license_file($filename);
+ if ($license) { $all_licenses{$license} = 1; }
+}
+
+open(RPM_SPEC, "<$rpm_spec");
+
+while (<RPM_SPEC>) {
+ if (/^Copyright:(.*)/i) {$copyright=$1;}
+ if (/^License:(.*)/i) {$license=$1;}
+}
+close(RPM_SPEC);
+
+if (! $license) {
+ $license = $copyright;
+}
+
+# print "GOT: $license\n";
+
+if ( $license ) {
+ $_ = $license;
+
+ # Remove extraneous material in the middile of the license text.
+ s/ \(see: [^)]*\)//; # Delete parenthetical see: references.
+ s/, ?no warranties//; # "No warranties" not important for our purposes.
+ s/See COPYRIGHT file//i;
+ s/\b,?URW holds copyright\b//i;
+
+ # Clean up front and back.
+ s/^\s*//;
+ s/[ \t\.]*$//; # Delete trailing periods and blanks.
+
+ $_ = ucfirst($_); # Uppercase first character. Remove this line if need to.
+
+ if (/^GPL2?$/i || /^GNU$/ || /^GNU ?GPL *(Version 2)?$/i) {$_ = "GPL"};
+ if (/^Apache ?Group License$/i) {$_ = "Apache"};
+ if (/^Apacheish$/i || /^Apache-style$/i) {$_ = "Apache-like"};
+ if (/^Artistic$/i) {$_ = "Artistic"};
+ if (/^BSD$/i) {$_ = "BSD"};
+ if (/^BSDish$/i || /^BSD-style$/i || /^BSD-like$/) {$_ = "BSD-like"};
+ if (/^Distributable$/i) {$_ = "Distributable"};
+ if (/^Distributable ?\(BSD-like\)$/i) {$_ = "BSD-like"};
+ if (/^Freely ?Distributable$/i) {$_ = "Freely distributable"};
+ if (/^Free,no warranties.?$/i) {$_ = "Free"};
+ if (/^freeware. See COPYRIGHT file.?$/i) {$_ = "Free"};
+ if (/^freeware.?$/i) {$_ = "Free"};
+ if (/^GPLand Artistic$/i) {$_ = "GPL and Artistic"};
+ if (/^GPL ?or BSD$/i) {$_ = "GPL or BSD"};
+ if (/^GPL\/XFree86$/i) {$_ = "GPL/MIT"};
+ if (/^distributable- most of it GPL$/i) {$_ = "Distributable - mostly GPL"};
+ if (/^IBM ?Public License Version 1.0 -/i) {$_ = "IBM Public License"};
+ if (/^IBM ?Public License$/i) {$_ = "IBM Public License"};
+ if (/^MIT, ?freely distributable/i) {$_ = "MIT"};
+ if (/^MIT\/X ?Consortium$/i) {$_ = "MIT"};
+ if (/^Non[- ]commercial[- ]use[- ]only$/i) {$_ = "Non-commercial use only"};
+ if (/^Proprietary$/i) {$_ = "Proprietary"};
+ if (/^Public ?domain$/i) {$_ = "Public domain"};
+ if (/^Universityof Washington's Free-Fork License$/i)
+ {$_ = "U of Washington's Free-Fork License"};
+ if (/^W3CCopyright \(BSD like\)$/i) {$_ = "BSD-like"};
+ if (/^X ?Consortium[ -]?like$/i) {$_ = "MIT-like"};
+ if (/^XFree86$/i) {$_ = "MIT"};
+ if (/^W3C Copyright \(BSD[- ]like\)$/i) {$_ = "BSD-like"};
+
+ # Eliminate license if it isn't really a license.
+ if (/^2000Red Hat, Inc.?$/i) {$_ = ""};
+ if (/^OMRON ?Corporation, OMRON Software Co., Ltd.?$/i) {$_ = ""};
+ if (/^Copyright\s?.?\s?[1-9][0-9][0-9][0-9]/i) {$_ = ""}; # Not a license.
+ if (/^\(C\)\s?[1-9][0-9][0-9][0-9]/i) {$_ = ""}; # Not a license.
+ if (/^[1-9][0-9][0-9][0-9]\s/i) {$_ = ""}; # A date, not a license.
+
+ $license = $_;
+}
+
+if ($license) {
+ print $license;
+} else {
+ # The spec file didn't tell us anything. Let's look for files that tell us.
+ &add_license("${program_dir}/LICENSE");
+ &add_license("${program_dir}/COPYING");
+ &add_license("${program_dir}/COPYING.LIB");
+ &add_license("${program_dir}/Artistic");
+ &add_license("${program_dir}/COPYING-2.0");
+ &add_license("${program_dir}/COPYING.WTFPL");
+ &add_license("${program_dir}/COPYING.GPL");
+ &add_license("${program_dir}/COPYING.NEWLIB");
+ &add_license("${program_dir}/COPYING.kdb");
+ if (-s "${program_dir}/COPYING.BSD") { # Assume there's a BSD license.
+ $all_licenses{"BSD"} = 1;
+ }
+ if (-s "${program_dir}/COPYING.MIT") { # Assume there's an MIT license.
+ $all_licenses{"MIT"} = 1;
+ }
+
+ if (%all_licenses) {
+ $license = "";
+ foreach $license_fragment (sort(keys(%all_licenses))) {
+ $license .= "${license_fragment}, "
+ }
+ $license =~ s/, $//;
+ print $license;
+ }
+
+}
+print "\n";
+
diff --git a/f90_count b/f90_count
new file mode 100755
index 0000000..e618493
--- /dev/null
+++ b/f90_count
@@ -0,0 +1,81 @@
+#!/usr/bin/perl
+# f90_count - count physical lines of code in Fortran 90.
+# Usage: f90_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+
+# Ignores comment-only lines (where first nonblank character = !).
+# Lines beginning with !hpf$ or !omp$ are not comments lines.
+
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+
+ open (FILE, $file);
+ while (<FILE>) {
+ # a comment is m/^\s*!/
+ # an empty line is m/^\s*$/
+ # a HPF statement is m/^\s*!hpf\$/i
+ # an Open MP statement is m/^\s*!omp\$/i
+ if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {$sloc++;}
+ }
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+ $sloc = 0;
+ close (FILE);
+}
diff --git a/fortran_count b/fortran_count
new file mode 100755
index 0000000..4df1f32
--- /dev/null
+++ b/fortran_count
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+# fortran_count - count physical lines of code in Fortran 77.
+# Usage: fortran_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+
+# Ignores comment-only lines
+# (where column 1 character = C, c, *, or !,
+# or where a ! is preceded only by white space)
+# Lines beginning with !hpf$ or !omp$ are not comments lines either.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+
+ open (FILE, $file);
+ while (<FILE>) {
+ # a normal comment is m/^[c*!]/i
+ # a fancier comment is m/^\s+!/i
+ # an empty line is m/^\s*$/i
+ # a HPF statement is m/^[c*!]hpf\$/i
+ # an Open MP statement is m/^[c*!]omp\$/i
+ if (! m/^([c*!]|\s+!|\s*$)/i || m/^[c*!](hpf|omp)\$/i) {$sloc++;}
+ }
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+ $sloc = 0;
+ close (FILE);
+}
diff --git a/generic_count b/generic_count
new file mode 100755
index 0000000..e4178eb
--- /dev/null
+++ b/generic_count
@@ -0,0 +1,77 @@
+#!/usr/bin/perl
+# generic_count - count physical lines of code, given a comment marker.
+# Usage: generic_count commentstart [-f file] [list_of_files]
+# commentstart: string that begins a comment (continuing til end-of-line)
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+# This is a trivial/naive program for scripts, etc.
+
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$commentstart = shift @ARGV;
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $filewithlist\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+
+ open (FILE, $file);
+ while (<FILE>) {
+ s/${commentstart}.*//;
+ if (m/\S/) {$sloc++;}
+ }
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+ close (FILE);
+}
diff --git a/get_sloc b/get_sloc
new file mode 100755
index 0000000..f590a8e
--- /dev/null
+++ b/get_sloc
@@ -0,0 +1,544 @@
+#!/usr/bin/perl -w
+
+# get_sloc
+# Take a list of dirs, and get the SLOC or filecount data from them.
+# NOTE: The intended input data ignores zero-length files & ignores dups,
+# so if that's true for the input data, it'll be true for the output data!
+
+# This code works but is NOT cleaned up-- it basically grew like
+# topsy. Many of the variable names are misleading, as my needs for
+# output changed.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+
+
+
+# Default values for the effort estimation model; the model is
+# effort = ($effort_factor * KiloSLOC) ** $effort_exponent.
+# The following numbers are for basic COCOMO:
+
+$effort_factor = 2.40;
+$effort_exponent = 1.05;
+$effort_estimation_message = "Basic COCOMO model,";
+
+$schedule_factor = 2.5;
+$schedule_exponent = 0.38;
+$schedule_estimation_message = "Basic COCOMO model,";
+
+# Average Salary / year.
+# Source: ComputerWorld, Sep. 4, 2000 Salary Survey,
+# average (U.S.) programmer/analyst salary.
+
+$person_cost = 56286.;
+
+# Overhead; the person cost is multiplied by this value to determine
+# true annual costs.
+
+$overhead = 2.4;
+
+@license_list = ( "GPL", "LGPL", "MIT", "BSD", "distributable",
+ "public domain", "MPL");
+
+%license_of = (); # input is name of program, output is license.
+
+$no_license_total = 0;
+
+%non_language_list = (
+ "dup" => 1,
+ "not" => 1,
+ "unknown" => 1,
+ "auto" => 1,
+ "zero" => 1,
+);
+
+%ignore_language_list = (
+ "makefile" => 1,
+ "sql" => 1,
+ "html" => 1,
+);
+
+# Default input values
+$dirs_in_stdin = 0; # 0: dirs to analyze as arguments, 1: in stdin
+
+# Default Output Values:
+
+$computing_sloc = 1; # 0= showing filecounts, 1= showing SLOC.
+$narrow = 1;
+$sort_by = "total"; # If empty, sort by name; else "total" or lang name.
+$show_effort = 0; # Show effort for each component?
+$break_line = 1; # Break up long lines into multiple lines?
+$show_non_lang = 0; # Show non-language counts?
+$one_program = 0; # Are all files part of a single program?
+$show_header = 1; # Show header?
+$show_footer = 1; # Show footer?
+
+
+# Global variables:
+
+@dirs = (); # Directories to examine
+
+%examined_directories = (); # Keys = Names of directories examined this run.
+
+# Subroutines.
+
+sub commify {
+# TODO: Needs to be internationalized.
+ my $text = reverse $_[0];
+ $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
+ return scalar reverse $text;
+}
+
+sub numformat {
+# Format number nicely with commas.
+ my $num = shift;
+ my $digits = shift;
+ return commify(sprintf("%0.${digits}f", $num));
+}
+
+sub effort_person_months {
+ # Given the SLOC, reply an estimate of the number of person-months
+ # needed to develop it traditionally.
+ my $total_sloc = shift;
+ return ( ($effort_factor*(($total_sloc/1000.0)**$effort_exponent)));
+}
+
+sub estimate_schedule {
+ # Given the person-months, reply an estimate of the number of months
+ # needed to develop it traditionally.
+ my $person_months = shift;
+ return ($schedule_factor*($person_months**$schedule_exponent));
+}
+
+sub get_lang_total {
+ my $lang = shift;
+ if (defined($lang_total{$lang})) {return $lang_total{$lang}}
+ else {return 0;}
+}
+
+# MAIN PROGRAM
+
+
+# Process options (if any):
+
+if ($#ARGV < 0) {
+ print STDERR "Error! You must list at least one directory to process, or --stdin.\n";
+ exit(1);
+}
+
+while ((scalar (@ARGV) > 0) && ($ARGV[0] =~ m/^-/)) {
+ $arg = shift;
+ if ($arg eq "--") {last;}
+ elsif ($arg eq "--filecount") {$computing_sloc = 0;}
+ elsif ($arg eq "--filecounts") {$computing_sloc = 0;}
+ elsif ($arg eq "--sloc") {$computing_sloc = 1;}
+ elsif ($arg eq "--narrow") {$narrow = 1;}
+ elsif ($arg eq "--wide") {$narrow = 0;}
+ elsif ($arg eq "--break") {$break_line = 1;}
+ elsif ($arg eq "--nobreak") {$break_line = 0;}
+ elsif ($arg eq "--sort") {$sort_by = shift;} # Must be "total" or a lang.
+ elsif ($arg eq "--nosort") {$sort_by = "";}
+ elsif ($arg eq "--showother") {$show_non_lang = 1;}
+ elsif ($arg eq "--noshowother") {$show_non_lang = 0;}
+ elsif ($arg eq "--oneprogram") {$one_program = 1;}
+ elsif ($arg eq "--noheader") {$show_header = 0;}
+ elsif ($arg eq "--nofooter") {$show_footer = 0;}
+ elsif ($arg eq "--addlang") { $lang = shift;
+ if (!defined($ignore_language_list{$lang})) {
+ die "Sorry, but $lang isn't ignored"; };
+ delete $ignore_language_list{$lang}; }
+ elsif ($arg eq "--addlangall") { %ignore_language_list = (); }
+ elsif ($arg eq "--effort") {$effort_factor = (shift)*1.0;
+ $effort_exponent = (shift)*1.0;
+ $effort_estimation_message = "effort model"}
+ elsif ($arg eq "--schedule") {$schedule_factor = (shift)*1.0;
+ $schedule_exponent = (shift)*1.0;
+ $schedule_estimation_message = "schedule model"}
+ elsif ($arg eq "--personcost") {$person_cost = (shift)*1.0;}
+ elsif ($arg eq "--overhead") {$overhead = (shift)*1.0;}
+ elsif ($arg eq "--stdin") {$dirs_in_stdin = 1;}
+ else {die "Unknown option: $arg\n";}
+}
+
+
+# Determine the languages to show:
+
+
+if ($computing_sloc) { $show_non_lang = 0; }
+
+if (!$show_non_lang) {
+ # Add the non_language_list to the ignored languages.
+ foreach $langname (keys(%non_language_list))
+ {$ignore_language_list{$langname} = 1;}
+}
+
+
+%lang_total = ();
+%license_total = ();
+
+@data_lines = ();
+
+$sloc = 0;
+$total_sloc = 0;
+$total_lang_sloc = 0;
+$grand_total_sloc = 0;
+$grand_total_lang_sloc = 0;
+$effort = 0.0;
+$grand_total_effort = 0.0;
+$grand_schedule = 0.0;
+
+if (!$narrow) {
+ # Ouch! To accurately determine the column positions and names,
+ # without "pre-knowing" them, we need to look through the data.
+ # So, we'll do it twice. This isn't efficient - if needed,
+ # speed it up by rewriting this to do it in-memory.
+ while (defined($_ = <DATAFILE>)) {
+ ($lang, $sloc) = split;
+ next if ( (!defined($lang)) || (!defined($sloc)) );
+ next if ($ignore_language_list{$lang});
+ $lang_total{$lang} = 0;
+ }
+}
+
+
+# Print the header.
+if ($show_header) {
+if ($narrow) {
+ if ($computing_sloc) { print "SLOC\t"; }
+ else { print "#Files\t"; }
+ if ($show_effort) {print "P.Y.\t";}
+ print "Directory\t";
+ if ($computing_sloc) { print "SLOC-by-Language (Sorted)"; }
+ else { print "#Files-by-Language (Sorted)"; }
+ print "\n";
+} else {
+ if ($computing_sloc) { print "SLOC\t"; }
+ else { print "#Files\t"; }
+ if ($show_effort) {print "P.M.\t";}
+ printf "%-22s\t", "Dir";
+ foreach $lang (keys(%lang_total)) {
+ print "$lang\t";
+ $lang_total{$lang} = 0;
+ };
+ print "\n";
+}
+}
+
+if ($dirs_in_stdin == 1) {
+ while (defined($dir = <STDIN>)) {
+ chomp ($dir);
+ push (@dirs, $dir);
+ }
+}
+
+while ($dir = shift) {
+ push (@dirs, $dir);
+}
+
+
+foreach $dir (@dirs) {
+ if (! -d "$dir") {
+ # print "Skipping non-directory $dir\n";
+ next;
+ }
+
+ # Skip previously-examined directories.
+ if ($examined_directories{$dir}) {
+ # print "Skipping already-examined directory $dir\n";
+ next;
+ }
+ $examined_directories{$dir} = 1;
+
+ if (! -r "${dir}/filelist") {
+ # print "Skipping directory $dir; it doesn't contain a file 'filelist'\n";
+ next;
+ }
+
+
+ $simplename = $dir;
+ $simplename =~ s!^.*\/!!;
+ $total_sloc = 0;
+ $total_lang_sloc = 0;
+ $preceding_entry = 0;
+
+ $line = "";
+ %lang_data = ();
+
+ if ($computing_sloc) {
+ $filename = "${dir}/all-physical.sloc";
+ } else {
+ $filename = "${dir}/all.filecount";
+ }
+ if (open(DATAFILE, "<$filename")) {
+ while (defined($_ = <DATAFILE>)) {
+ ($lang, $sloc) = split;
+ next if ( (!defined($lang)) || (!defined($sloc)) );
+ next if ($ignore_language_list{$lang});
+ if ($narrow) { if ($sloc) {$lang_data{$lang} = $sloc;}}
+ else { $line .= "${sloc}\t"; }
+ if ($lang eq $sort_by) {$interesting_lang_sloc = $sloc;}
+ $total_sloc += $sloc;
+ $total_lang_sloc += $sloc unless ($non_language_list{$lang});
+ $lang_total{$lang} += $sloc;
+ }
+ close(DATAFILE);
+ } else {
+ print STDERR "Error openinig $filename\n";
+ }
+ if ($narrow) {
+ # For narrow view, sort the language entries.
+ foreach $entry (sort {$lang_data{$b} <=> $lang_data{$a}} keys %lang_data){
+ if ($preceding_entry) {$line .= ",";}
+ $preceding_entry = 1;
+ $line .= "${entry}=${lang_data{$entry}}";
+ }
+ if (!$preceding_entry) {$line .= "(none)";}
+ }
+
+ $grand_total_sloc += $total_sloc;
+ $grand_total_lang_sloc += $total_lang_sloc;
+
+ $effort = effort_person_months($total_sloc);
+ $grand_total_effort += $effort;
+
+ $schedule = estimate_schedule($effort);
+ if ($schedule > $grand_schedule) {
+ $grand_schedule = $schedule; # The longest leg wins.
+ }
+
+ $displayed_effort = "";
+ if ($show_effort) { $displayed_effort = sprintf "%.2f\t", $effort; }
+ if ($narrow) {
+ $displayed_name = "$simplename";
+ } else {
+ $displayed_name = sprintf "%-22s\t", $simplename;
+ }
+
+ # Add to the corresponding license, if the license is known.
+ $license = "";
+ if (open(LICENSE_FILE, "<${dir}/PROGRAM_LICENSE")) {
+ $license = <LICENSE_FILE>;
+ chomp($license);
+ close(LICENSE_FILE);
+ if ($license) {
+ $license_of{$simplename} = $license; # Hash currently unused.
+ if (! defined($license_total{$license})) {
+ $license_total{$license} = 0;
+ }
+ $license_total{$license} = $license_total{$license} + $total_sloc;
+ }
+ } else {
+ $no_license_total += $total_sloc;
+ }
+
+ if ($narrow) {
+ $line = sprintf "%-7d %s%-15s %-s\n", $total_sloc, $displayed_effort,
+ $simplename, $line;
+ if ($break_line && (length($line) > 77)) { # Break up long line.
+ $line =~ s/(.{71})([^,]*),(.*)/$1$2,\n $3/;
+ }
+ if ($license) {
+ $line .= " [$license]\n";
+ }
+ } else {
+ $line = "${total_sloc}\t${displayed_effort}${displayed_name}${line}\n";
+ }
+ if ($sort_by) {
+ if ($sort_by eq "total") {$line = "$total_sloc\t$line";}
+ else {$line = "$interesting_lang_sloc\t$line";}
+ $data_lines[$#data_lines+1] = $line; # Add to data lines.
+ } else {
+ print $line; # No sort - print immediately for speed.
+ }
+
+}
+
+if ($sort_by) {
+ # Print sorted version. This is a little inefficient, but for
+ # only a few hundred or thousand values it doesn't matter.
+ @sorted_data_lines = sort { ($b =~ /^(\d+)/)[0] <=> ($a =~ /^(\d+)/)[0] }
+ @data_lines;
+ foreach $line (@sorted_data_lines) {
+ $short_line = $line;
+ $short_line =~ s/^[^\t]*\t//; # Remove sort field.
+ print $short_line;
+ }
+}
+
+
+if (! $show_footer) {exit(0);}
+if ($grand_total_sloc == 0) {
+ print "SLOC total is zero, no further analysis performed.\n";
+ exit(1);
+}
+
+# Print the footer.
+if ($narrow) {
+ print "\n";
+ print "\n";
+ print "Totals grouped by language (dominant language first):\n";
+ # If you don't want the list sorted by size of language, just do:
+ # foreach $lang (@language_list) {
+ foreach $lang (sort {&get_lang_total($b) <=> &get_lang_total($a) } keys(%lang_total) ) {
+ $percent = get_lang_total($lang) * 100.0 / $grand_total_sloc;
+ if ($percent > 0.0) {
+ printf "%-9s %9d (%.2f%%)\n", $lang . ":", $lang_total{$lang}, $percent;
+ }
+ };
+
+ if ($show_non_lang) {
+ # The previous list showed "non-languages", so now we'll show only the
+ # data for data associated with a normal language:
+ print "\n";
+ print "\n";
+ foreach $lang (sort {&get_lang_total($b) <=> &get_lang_total($a) } keys(%lang_total)) {
+ next if (defined($non_language_list{$lang}));
+ $percent = $lang_total{$lang} * 100.0 / $grand_total_lang_sloc;
+ if ($percent > 0.0) {
+ printf "%-9s %9d (%.2f%%)\n", $lang . ":", $lang_total{$lang}, $percent;
+ }
+ };
+ }
+
+} else { # Not narrow.
+
+ print "$grand_total_sloc\t";
+ if ($show_effort) {printf "%.2f\t", $grand_total_effort;}
+
+ printf "%-22s", "Totals";
+ foreach $lang (keys(%lang_total)) {
+ print "\t$lang_total{$lang}";
+ };
+
+ print "\t";
+ if ($show_effort) {printf "\t";}
+ printf "%-22s\t", "Percentages";
+ foreach $lang (keys(%lang_total)) {
+ $percent = $lang_total{$lang} * 100.0 / $grand_total_sloc;
+ printf "\t%0.2f", $percent;
+ };
+ print "\n";
+
+ print "\t";
+ if ($show_effort) {printf "\t";}
+ printf "%-22s\t", "Code Percentages";
+ foreach $lang (keys(%lang_total)) {
+ next if (defined($non_language_list{$lang}));
+ $percent = $lang_total{$lang} * 100.0 / $grand_total_lang_sloc;
+ printf "\t%0.2f", $percent;
+ };
+ print "\n";
+}
+
+print "\n";
+print "\n";
+
+
+if (%license_total) {
+ # We have license info on something, so if there's anything
+ # unallocated, add that to the list.
+ if ($no_license_total) {
+ $license_total{"Not listed"} = $no_license_total;
+ }
+ print "Licenses:\n";
+ foreach $license (sort {$license_total{$b} <=> $license_total{$a} } keys(%license_total)) {
+ $percent = $license_total{$license} * 100.0 / $grand_total_sloc;
+ if ($percent > 0.0) {
+ printf "%9d (%.2f%%) %s\n", $license_total{$license}, $percent, $license;
+ }
+ };
+ print "\n";
+ print "\n";
+
+ print "Percentage of Licenses containing selected key phrases:\n";
+ %license_phrase = ();
+ foreach $license (keys(%license_total)) {
+ foreach $phrase (@license_list) {
+ if ($license =~ m/\b$phrase\b/i) {
+ if (!defined($license_phrase{$phrase})) {$license_phrase{$phrase} = 0;}
+ $license_phrase{$phrase} = $license_phrase{$phrase} +
+ $license_total{$license};
+ }
+ }
+ }
+
+ foreach $phrase (sort {$license_phrase{$b} <=> $license_phrase{$a} } keys(%license_phrase)) {
+ $percent = $license_phrase{$phrase} * 100.0 / $grand_total_sloc;
+ if ($percent > 0.0) {
+ printf "%9d (%.2f%%) %s\n", $license_phrase{$phrase}, $percent, $phrase;
+ }
+ };
+
+}
+
+
+print "\n";
+print "\n";
+
+if ($computing_sloc) {
+ if ($one_program) {
+ # If it's one program, override the grand total of effort
+ # and the schedule calculations by using the total SLOC.
+
+ $grand_total_effort = effort_person_months($grand_total_sloc);
+ $grand_schedule = estimate_schedule($grand_total_effort);
+ }
+ printf "Total Physical Source Lines of Code (SLOC) = %s\n",
+ commify($grand_total_sloc);
+
+ printf "Development Effort Estimate, Person-Years (Person-Months) = %s (%s)\n",
+ numformat($grand_total_effort/12.0, 2),
+ numformat($grand_total_effort, 2);
+ print " ($effort_estimation_message " .
+ "Person-Months = $effort_factor * (KSLOC**$effort_exponent))\n";
+
+ printf "Schedule Estimate, Years (Months) = %s (%s)\n",
+ numformat($grand_schedule/12.0, 2),
+ numformat($grand_schedule, 2);
+ print " ($schedule_estimation_message " .
+ "Months = $schedule_factor * (person-months**$schedule_exponent))\n";
+
+ # Don't show this if there are multiple programs, because the computation
+ # is essentially meaningless: after the "smaller" projects have completed,
+ # the longest one would keep going:
+ if ($one_program && ($grand_schedule > 0.0)) {
+ printf "Estimated Average Number of Developers (Effort/Schedule) = %s\n",
+ numformat($grand_total_effort / $grand_schedule, 2);
+ }
+
+
+ $value = ($grand_total_effort / 12.0) * $person_cost * $overhead;
+ printf "Total Estimated Cost to Develop = \$ %s\n",
+ numformat($value, 0);
+ printf " (average salary = \$%s/year, overhead = %0.2f).\n",
+ commify($person_cost), $overhead;
+
+} else {
+print "Total Number of Files = $grand_total_sloc\n";
+print "Total Number of Source Code Files = $grand_total_lang_sloc\n";
+}
+print "SLOCCount, Copyright (C) 2001-2004 David A. Wheeler\n";
+print "SLOCCount is Open Source Software/Free Software, licensed under the GNU GPL.\n";
+print "SLOCCount comes with ABSOLUTELY NO WARRANTY, and you are welcome to\n";
+print "redistribute it under certain conditions as specified by the GNU GPL license;\n";
+print "see the documentation for details.\n";
+print "Please credit this data as \"generated using David A. Wheeler's 'SLOCCount'.\"\n";
+
diff --git a/get_sloc_details b/get_sloc_details
new file mode 100755
index 0000000..56ef45a
--- /dev/null
+++ b/get_sloc_details
@@ -0,0 +1,103 @@
+#!/usr/bin/perl -w
+
+# get_sloc_details
+# Take a list of dirs, and get the detailed SLOC entries for every file.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+
+
+sub print_data
+{
+ my $dir = shift;
+ my $langfile = shift;
+ my $saw_total = 0;
+ my $filename = "${dir}/${langfile}";
+ my $lang = $langfile;
+ $lang =~ s/_outfile\.dat$//;
+
+ open(RAWDATA, "<$filename") ||
+ return;
+ # die "Can't open file in $dir for language $lang.\n";
+
+ if ($lang eq "asm") {
+ while (<RAWDATA>) {
+ if (m/^Total:/) {
+ $saw_total = 1;
+ last;
+ }
+ chomp;
+ if (m/^([0-9]+)\s+\([^\)]+\)\s+(.*)/) {
+ print "$1\t$lang\t$dir\t$2\n";
+ } else {
+ print STDERR "Warning: file $filename has unexpected text: $_\n";
+ }
+ }
+ } else {
+ while (<RAWDATA>) {
+ if (m/^Total:/) {
+ $saw_total = 1;
+ last;
+ }
+ chomp;
+ if (m/^([0-9]+)\s+(.*)/) {
+ print "$1\t$lang\t$dir\t$2\n";
+ } else {
+ print STDERR "Warning: file $filename has unexpected text: $_\n";
+ }
+ }
+ }
+ close(RAWDATA);
+ if (! $saw_total) {
+ print STDERR "Warning! No 'Total' line in $filename.\n";
+ }
+}
+
+# MAIN PROGRAM
+
+
+if ($#ARGV < 0) {
+ print STDERR "Error! You must list at least one directory to process.\n";
+ exit(1);
+}
+
+
+while ( $dir = shift ) {
+
+ if (! -d "$dir") {
+ # print "Skipping non-directory $dir\n";
+ next;
+ }
+
+ if (! -r "${dir}/filelist") {
+ # print "Skipping directory $dir; it doesn't contain a file 'filelist'\n";
+ next;
+ }
+
+ opendir(DATADIR, $dir) || die "can't opendir $dir: $!";
+ @outfiles = grep { /outfile\.dat$/ } readdir(DATADIR);
+ closedir DATADIR;
+ foreach $langfile (@outfiles) {
+ print_data($dir, $langfile);
+ }
+
+}
+
diff --git a/haskell_count b/haskell_count
new file mode 100755
index 0000000..21299aa
--- /dev/null
+++ b/haskell_count
@@ -0,0 +1,122 @@
+#!/usr/bin/perl -w
+# haskell_count - count physical lines of code
+# Strips out {- .. -} and -- comments and counts the rest.
+# Pragmas, {-#...}, are counted as SLOC.
+# BUG: Doesn't handle strings with embedded block comment markers gracefully.
+# In practice, that shouldn't be a problem.
+# Usage: haskell_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+
+
+
+
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub determine_lit_type {
+ my ($file) = @_;
+
+ open (FILE, $file);
+ while (<FILE>) {
+ if (m/^\\begin{code}/) { close FILE; return 2; }
+ if (m/^>\s/) { close FILE; return 1; }
+ }
+
+ return 0;
+}
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+ my $incomment = 0;
+ my ($literate, $inlitblock) = (0,0);
+
+ $literate = 1 if $file =~ /\.lhs$/;
+ if($literate) { $literate = determine_lit_type($file) }
+
+ open (FILE, $file);
+ while (<FILE>) {
+ if ($literate == 1) {
+ if (!s/^>//) { s/.*//; }
+ } elsif ($literate == 2) {
+ if ($inlitblock) {
+ if (m/^\\end{code}/) { s/.*//; $inlitblock = 0; }
+ } elsif (!$inlitblock) {
+ if (m/^\\begin{code}/) { s/.*//; $inlitblock = 1; }
+ else { s/.*//; }
+ }
+ }
+
+ if ($incomment) {
+ if (m/\-\}/) { s/^.*?\-\}//; $incomment = 0;}
+ else { s/.*//; }
+ }
+ if (!$incomment) {
+ s/--.*//;
+ s!{-[^#].*?-}!!g;
+ if (m/{-/ && (!m/{-#/)) {
+ s/{-.*//;
+ $incomment = 1;
+ }
+ }
+ if (m/\S/) {$sloc++;}
+ }
+ print "$sloc $file\n";
+ if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
+ $total_sloc += $sloc;
+ $sloc = 0;
+ $incomment = 0;
+ close (FILE);
+}
diff --git a/java_lines_environment.dat b/java_lines_environment.dat
new file mode 100644
index 0000000..56897e9
--- /dev/null
+++ b/java_lines_environment.dat
@@ -0,0 +1,98 @@
+ Temporary Project Name (* Project_Name,in 45 spaces *)
+0 (* QA_Switch *)
+1 (* Compare_Spec *)
+999 (* Line_Length *)
+1000 (* Exec_Lines *)
+500 (* Data_Lines *)
+60.0 (* Min_Percent *)
+0.0 (* Inc_Percent *)
+0 (* Display_File *)
+0 (* Intro_Msg *)
+P (* SLOC_Def *)
+(*---------------------------------------------------------------------------*)
+(* *)
+(* Refer to the source code file, 'java_lines.c', for further information *)
+(* pertaining to the INSTALLATION PROCEDURES and EXECUTION PROCEDURES of *)
+(* this code counting tool. *)
+(* *)
+(* Note: *)
+(* 1. The above user-defined parameters must be spaced one entry per line *)
+(* of this file. Numeric entries, with the exception of 'Inc_Percent', *)
+(* are of type Integer. *)
+(* *)
+(* 2. The 'java_lines_environment.dat' file must be co-located in the *)
+(* directory/path whereas the code counting tool is to be invoked. *)
+(* Failure to do so will result in the insertion of predefined default *)
+(* values for the entries contained herein. *)
+(* *)
+(*---------------------------------------------------------------------------*)
+(* *)
+(* USER DEFINEABLE PARAMETERS *)
+(* *)
+(* Project_Name -- Allows the user to insert the name of the Program or *)
+(* Project that the source code to be counted pertains. *)
+(* The Project_Name will appear within at the headings of *)
+(* of the 'java_outfile.dat' file produced upon execution *)
+(* of the 'java_lines' code counting tool. *)
+(* *)
+(* QA_Switch -- Allows the user to turn on '1' or to turn off '0' the *)
+(* reporting of programming language reserve word usage *)
+(* as found in the summary page of 'java_outfile.dat'. *)
+(* *)
+(* Compare_Spec -- Allows the user to control the case sensitivity of the *)
+(* code counting tool. A setting of '1' indicates that *)
+(* full case sensitive comparisons must be made. A setting*)
+(* of '0' allows valid comparisons to occur between like *)
+(* letters of upper and lower case. *)
+(* *)
+(* Line_Length -- Allows user to force the code counting tool to ignore *)
+(* information beyond 'Line_Length' characters per physical*)
+(* line of input. It is recommended that the length of *)
+(* the longest physical line to be read be used, i.e. 132. *)
+(* *)
+(* Exec_Lines -- Allows the user to set a threshold whereby the number *)
+(* of files processed with executable lines in exceedance *)
+(* of 'Exec_Lines' will be reported on the summary page of *)
+(* 'java_outfile.dat'. *)
+(* *)
+(* Data_Lines -- Allows the user to set a threshold whereby the number *)
+(* of files processed with data declaration lines in *)
+(* exceedance of 'Data_Lines' will be reported on the *)
+(* summary page of 'java_outfile.dat'. *)
+(* *)
+(* Min_Percent -- Allows the user to set a threshold whereby the number *)
+(* of files processed with a ratio of comments (whole & *)
+(* embedded) to SLOC (physical or logical) is less than *)
+(* 'Min_Percent'. *)
+(* *)
+(* Inc_Percent -- Allows the user to set a progress increment whereby a *)
+(* progress message will appear on the terminal screen *)
+(* during execution of the 'java_lines' tool. The progress*)
+(* message indicates that approximately 'Inc_Percent' of *)
+(* source code files to be processed have completed since *)
+(* the previous progress message appeared. The progress *)
+(* reporting is based solely on the number of files *)
+(* contained in 'java_list.dat'. Actual run-time progress *)
+(* is dependent on the relative size of each source code *)
+(* file and the user loading of the host platform machine. *)
+(* A setting of 0.0 will disable the reporting of the *)
+(* progress message. *)
+(* *)
+(* Display_File -- Allows the user to turn on '1' or to turn off '0' the *)
+(* reporting of last file to be processed within the *)
+(* java_list.dat file. *)
+(* *)
+(* Intro_Msg -- Allows the user to turn on '1' or to turn off '0' the *)
+(* output of the introduction message as the first page *)
+(* of the 'java_outfile.dat' file. *)
+(* *)
+(* SLOC_Def -- Allows the user to select the definition of a Source *)
+(* Line of Code (SLOC) to be used during the operation of *)
+(* the CodeCount tool. A setting of 'P' envokes the SLOC *)
+(* definition of Physical lines, a.k.a., non-comment, *)
+(* non-blank, physical lines of code or Deliverable Source *)
+(* Instructions (DSIs). A setting of 'L' envokes the SLOC *)
+(* definition of Logical lines, a.k.a., non-comment, *)
+(* non-blank, logical lines of code. *)
+(* *)
+(*---------------------------------------------------------------------------*)
diff --git a/jsp_count.c b/jsp_count.c
new file mode 100644
index 0000000..42cb2af
--- /dev/null
+++ b/jsp_count.c
@@ -0,0 +1,1787 @@
+/* A lexical scanner generated by flex */
+
+/* Scanner skeleton version:
+ * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $
+ */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+
+#include <stdio.h>
+#include <unistd.h>
+
+
+/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
+#ifdef c_plusplus
+#ifndef __cplusplus
+#define __cplusplus
+#endif
+#endif
+
+
+#ifdef __cplusplus
+
+#include <stdlib.h>
+
+/* Use prototypes in function declarations. */
+#define YY_USE_PROTOS
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else /* ! __cplusplus */
+
+#if __STDC__
+
+#define YY_USE_PROTOS
+#define YY_USE_CONST
+
+#endif /* __STDC__ */
+#endif /* ! __cplusplus */
+
+#ifdef __TURBOC__
+ #pragma warn -rch
+ #pragma warn -use
+#include <io.h>
+#include <stdlib.h>
+#define YY_USE_CONST
+#define YY_USE_PROTOS
+#endif
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+
+#ifdef YY_USE_PROTOS
+#define YY_PROTO(proto) proto
+#else
+#define YY_PROTO(proto) ()
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index. If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* Enter a start condition. This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN yy_start = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state. The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START ((yy_start - 1) / 2)
+#define YYSTATE YY_START
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart( yyin )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#define YY_BUF_SIZE 16384
+
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+
+extern int yyleng;
+extern FILE *yyin, *yyout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+/* The funky do-while in the following #define is used to turn the definition
+ * int a single C statement (which needs a semi-colon terminator). This
+ * avoids problems with code like:
+ *
+ * if ( condition_holds )
+ * yyless( 5 );
+ * else
+ * do_something_else();
+ *
+ * Prior to using the do-while the compiler would get upset at the
+ * "else" because it interpreted the "if" statement as being all
+ * done when it reached the ';' after the yyless() call.
+ */
+
+/* Return all but the first 'n' matched characters back to the input stream. */
+
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ *yy_cp = yy_hold_char; \
+ YY_RESTORE_YY_MORE_OFFSET \
+ yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
+ YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+ } \
+ while ( 0 )
+
+#define unput(c) yyunput( c, yytext_ptr )
+
+/* The following is because we cannot portably get our hands on size_t
+ * (without autoconf's help, which isn't available because we want
+ * flex-generated scanners to compile on their own).
+ */
+typedef unsigned int yy_size_t;
+
+
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ yy_size_t yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ int yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+ /* When an EOF's been seen but there's still some text to process
+ * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+ * shouldn't try reading from the input source any more. We might
+ * still have a bunch of tokens to match, though, because of
+ * possible backing-up.
+ *
+ * When we actually see the EOF, we change the status to "new"
+ * (via yyrestart()), so that the user can continue scanning by
+ * just pointing yyin at a new input file.
+ */
+#define YY_BUFFER_EOF_PENDING 2
+ };
+
+static YY_BUFFER_STATE yy_current_buffer = 0;
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ */
+#define YY_CURRENT_BUFFER yy_current_buffer
+
+
+/* yy_hold_char holds the character lost when yytext is formed. */
+static char yy_hold_char;
+
+static int yy_n_chars; /* number of characters read into yy_ch_buf */
+
+
+int yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = (char *) 0;
+static int yy_init = 1; /* whether we need to initialize */
+static int yy_start = 0; /* start state number */
+
+/* Flag which is used to allow yywrap()'s to do buffer switches
+ * instead of setting up a fresh yyin. A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void yyrestart YY_PROTO(( FILE *input_file ));
+
+void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer ));
+void yy_load_buffer_state YY_PROTO(( void ));
+YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size ));
+void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b ));
+void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file ));
+void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b ));
+#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer )
+
+YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size ));
+YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str ));
+YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len ));
+
+static void *yy_flex_alloc YY_PROTO(( yy_size_t ));
+static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t ));
+static void yy_flex_free YY_PROTO(( void * ));
+
+#define yy_new_buffer yy_create_buffer
+
+#define yy_set_interactive(is_interactive) \
+ { \
+ if ( ! yy_current_buffer ) \
+ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
+ yy_current_buffer->yy_is_interactive = is_interactive; \
+ }
+
+#define yy_set_bol(at_bol) \
+ { \
+ if ( ! yy_current_buffer ) \
+ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
+ yy_current_buffer->yy_at_bol = at_bol; \
+ }
+
+#define YY_AT_BOL() (yy_current_buffer->yy_at_bol)
+
+
+#define yywrap() 1
+#define YY_SKIP_YYWRAP
+typedef unsigned char YY_CHAR;
+FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
+typedef int yy_state_type;
+extern char *yytext;
+#define yytext_ptr yytext
+static yyconst short yy_nxt[][11] =
+ {
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0
+ },
+
+ {
+ 9, 10, 11, 12, 10, 13, 10, 14, 10, 15,
+ 10
+ },
+
+ {
+ 9, 10, 11, 12, 10, 13, 10, 14, 10, 15,
+ 10
+ },
+
+ {
+ 9, 16, 16, 17, 16, 16, 16, 16, 18, 16,
+ 16
+ },
+
+ {
+ 9, 16, 16, 17, 16, 16, 16, 16, 18, 16,
+ 16
+
+ },
+
+ {
+ 9, 19, 19, 20, 19, 19, 19, 19, 21, 19,
+ 19
+ },
+
+ {
+ 9, 19, 19, 20, 19, 19, 19, 19, 21, 19,
+ 19
+ },
+
+ {
+ 9, 22, 22, 23, 22, 24, 22, 22, 22, 22,
+ 22
+ },
+
+ {
+ 9, 22, 22, 23, 22, 24, 22, 22, 22, 22,
+ 22
+ },
+
+ {
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9,
+ -9
+
+ },
+
+ {
+ 9, 25, 25, -10, 25, -10, 25, 25, 25, -10,
+ 25
+ },
+
+ {
+ 9, -11, -11, -11, -11, -11, -11, -11, -11, -11,
+ -11
+ },
+
+ {
+ 9, -12, -12, -12, -12, -12, -12, -12, -12, -12,
+ -12
+ },
+
+ {
+ 9, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+ -13
+ },
+
+ {
+ 9, -14, -14, -14, -14, -14, -14, -14, -14, -14,
+ -14
+
+ },
+
+ {
+ 9, -15, -15, -15, 26, -15, 27, -15, -15, -15,
+ -15
+ },
+
+ {
+ 9, -16, -16, -16, -16, -16, -16, -16, -16, -16,
+ -16
+ },
+
+ {
+ 9, -17, -17, -17, -17, -17, -17, -17, -17, -17,
+ -17
+ },
+
+ {
+ 9, -18, -18, -18, -18, -18, -18, -18, 28, -18,
+ -18
+ },
+
+ {
+ 9, -19, -19, -19, -19, -19, -19, -19, -19, -19,
+ -19
+
+ },
+
+ {
+ 9, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+ -20
+ },
+
+ {
+ 9, -21, -21, -21, -21, -21, -21, -21, 29, -21,
+ -21
+ },
+
+ {
+ 9, 30, 30, -22, 30, -22, 30, 30, 30, 30,
+ 30
+ },
+
+ {
+ 9, -23, -23, -23, -23, -23, -23, -23, -23, -23,
+ -23
+ },
+
+ {
+ 9, -24, -24, -24, -24, -24, -24, -24, -24, -24,
+ -24
+
+ },
+
+ {
+ 9, 25, 25, -25, 25, -25, 25, 25, 25, -25,
+ 25
+ },
+
+ {
+ 9, -26, -26, -26, -26, -26, -26, -26, 31, -26,
+ -26
+ },
+
+ {
+ 9, -27, -27, -27, -27, -27, -27, -27, 32, -27,
+ -27
+ },
+
+ {
+ 9, -28, -28, -28, -28, -28, -28, -28, -28, -28,
+ 33
+ },
+
+ {
+ 9, -29, -29, -29, -29, -29, 34, -29, -29, -29,
+ -29
+
+ },
+
+ {
+ 9, 30, 30, -30, 30, -30, 30, 30, 30, 30,
+ 30
+ },
+
+ {
+ 9, -31, -31, -31, -31, -31, -31, -31, 35, -31,
+ -31
+ },
+
+ {
+ 9, -32, -32, -32, -32, -32, -32, -32, 36, -32,
+ -32
+ },
+
+ {
+ 9, -33, -33, -33, -33, -33, -33, -33, -33, -33,
+ -33
+ },
+
+ {
+ 9, -34, -34, -34, -34, -34, -34, -34, -34, -34,
+ 37
+
+ },
+
+ {
+ 9, -35, -35, -35, -35, -35, -35, -35, -35, -35,
+ -35
+ },
+
+ {
+ 9, -36, -36, -36, -36, -36, -36, -36, -36, -36,
+ -36
+ },
+
+ {
+ 9, -37, -37, -37, -37, -37, -37, -37, -37, -37,
+ -37
+ },
+
+ } ;
+
+
+static yy_state_type yy_get_previous_state YY_PROTO(( void ));
+static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state ));
+static int yy_get_next_buffer YY_PROTO(( void ));
+static void yy_fatal_error YY_PROTO(( yyconst char msg[] ));
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+ yytext_ptr = yy_bp; \
+ yyleng = (int) (yy_cp - yy_bp); \
+ yy_hold_char = *yy_cp; \
+ *yy_cp = '\0'; \
+ yy_c_buf_p = yy_cp;
+
+#define YY_NUM_RULES 17
+#define YY_END_OF_BUFFER 18
+static yyconst short int yy_accept[38] =
+ { 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 18, 6,
+ 1, 4, 5, 7, 7, 10, 9, 10, 13, 12,
+ 13, 14, 15, 16, 6, 0, 0, 0, 0, 14,
+ 0, 0, 8, 0, 2, 3, 11
+ } ;
+
+static yyconst int yy_ec[256] =
+ { 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 4, 5, 1, 1, 6, 1, 1, 7,
+ 1, 1, 1, 1, 8, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 9,
+ 1, 10, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+ } ;
+
+static yy_state_type yy_last_accepting_state;
+static char *yy_last_accepting_cpos;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *yytext;
+#line 1 "jsp_count.l"
+#define INITIAL 0
+#line 2 "jsp_count.l"
+
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler and Bob Brown.
+This is a tweaked version by Bob Brown, derived from
+David A. Wheeler's pascal_count.l.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+Bob Brown's website is: http://www.openeye.com/rlb
+*/
+
+#include "driver.h"
+
+#define YY_NO_UNPUT
+
+/* 1 if we saw a non-comment, non-whitespace char on this line */
+int saw_char = 0;
+static void count(void);
+
+#define chtml 1
+
+#define cjsp 2
+
+#define string 3
+
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap YY_PROTO(( void ));
+#else
+extern int yywrap YY_PROTO(( void ));
+#endif
+#endif
+
+#ifndef YY_NO_UNPUT
+static void yyunput YY_PROTO(( int c, char *buf_ptr ));
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int ));
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen YY_PROTO(( yyconst char * ));
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput YY_PROTO(( void ));
+#else
+static int input YY_PROTO(( void ));
+#endif
+#endif
+
+#if YY_STACK_USED
+static int yy_start_stack_ptr = 0;
+static int yy_start_stack_depth = 0;
+static int *yy_start_stack = 0;
+#ifndef YY_NO_PUSH_STATE
+static void yy_push_state YY_PROTO(( int new_state ));
+#endif
+#ifndef YY_NO_POP_STATE
+static void yy_pop_state YY_PROTO(( void ));
+#endif
+#ifndef YY_NO_TOP_STATE
+static int yy_top_state YY_PROTO(( void ));
+#endif
+
+#else
+#define YY_NO_PUSH_STATE 1
+#define YY_NO_POP_STATE 1
+#define YY_NO_TOP_STATE 1
+#endif
+
+#ifdef YY_MALLOC_DECL
+YY_MALLOC_DECL
+#else
+#if __STDC__
+#ifndef __cplusplus
+#include <stdlib.h>
+#endif
+#else
+/* Just try to get by without declaring the routines. This will fail
+ * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
+ * or sizeof(void*) != sizeof(int).
+ */
+#endif
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO (void) fwrite( yytext, yyleng, 1, yyout )
+#endif
+
+/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+ if ( yy_current_buffer->yy_is_interactive ) \
+ { \
+ int c = '*', n; \
+ for ( n = 0; n < max_size && \
+ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
+ buf[n] = (char) c; \
+ if ( c == '\n' ) \
+ buf[n++] = (char) c; \
+ if ( c == EOF && ferror( yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ result = n; \
+ } \
+ else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \
+ && ferror( yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" );
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL int yylex YY_PROTO(( void ))
+#endif
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+#define YY_RULE_SETUP \
+ YY_USER_ACTION
+
+YY_DECL
+ {
+ register yy_state_type yy_current_state;
+ register char *yy_cp = NULL, *yy_bp = NULL;
+ register int yy_act;
+
+#line 46 "jsp_count.l"
+
+ line_number = 1;
+ saw_char = 0;
+ BEGIN(INITIAL);
+
+
+ if ( yy_init )
+ {
+ yy_init = 0;
+
+#ifdef YY_USER_INIT
+ YY_USER_INIT;
+#endif
+
+ if ( ! yy_start )
+ yy_start = 1; /* first start state */
+
+ if ( ! yyin )
+ yyin = stdin;
+
+ if ( ! yyout )
+ yyout = stdout;
+
+ if ( ! yy_current_buffer )
+ yy_current_buffer =
+ yy_create_buffer( yyin, YY_BUF_SIZE );
+
+ yy_load_buffer_state();
+ }
+
+ while ( 1 ) /* loops until end-of-file is reached */
+ {
+ yy_cp = yy_c_buf_p;
+
+ /* Support of yytext. */
+ *yy_cp = yy_hold_char;
+
+ /* yy_bp points to the position in yy_ch_buf of the start of
+ * the current run.
+ */
+ yy_bp = yy_cp;
+
+ yy_current_state = yy_start;
+yy_match:
+ while ( (yy_current_state = yy_nxt[yy_current_state][yy_ec[YY_SC_TO_UI(*yy_cp)]]) > 0 )
+ {
+ if ( yy_accept[yy_current_state] )
+ {
+ yy_last_accepting_state = yy_current_state;
+ yy_last_accepting_cpos = yy_cp;
+ }
+
+ ++yy_cp;
+ }
+
+ yy_current_state = -yy_current_state;
+
+yy_find_action:
+ yy_act = yy_accept[yy_current_state];
+
+ YY_DO_BEFORE_ACTION;
+
+
+do_action: /* This label is used only to access EOF actions. */
+
+
+ switch ( yy_act )
+ { /* beginning of action switch */
+ case 0: /* must back up */
+ /* undo the effects of YY_DO_BEFORE_ACTION */
+ *yy_cp = yy_hold_char;
+ yy_cp = yy_last_accepting_cpos + 1;
+ yy_current_state = yy_last_accepting_state;
+ goto yy_find_action;
+
+case 1:
+YY_RULE_SETUP
+#line 51 "jsp_count.l"
+/* Do nothing */
+ YY_BREAK
+case 2:
+YY_RULE_SETUP
+#line 52 "jsp_count.l"
+{ BEGIN(chtml); }
+ YY_BREAK
+case 3:
+YY_RULE_SETUP
+#line 53 "jsp_count.l"
+{ BEGIN(cjsp); }
+ YY_BREAK
+case 4:
+YY_RULE_SETUP
+#line 54 "jsp_count.l"
+{ count(); }
+ YY_BREAK
+case 5:
+YY_RULE_SETUP
+#line 56 "jsp_count.l"
+{saw_char = 1; BEGIN(string);}
+ YY_BREAK
+case 6:
+YY_RULE_SETUP
+#line 58 "jsp_count.l"
+{saw_char = 1;}
+ YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 59 "jsp_count.l"
+{saw_char = 1;}
+ YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 62 "jsp_count.l"
+{ BEGIN(INITIAL); }
+ YY_BREAK
+case 9:
+YY_RULE_SETUP
+#line 63 "jsp_count.l"
+{ count(); }
+ YY_BREAK
+case 10:
+YY_RULE_SETUP
+#line 64 "jsp_count.l"
+/* no-op */
+ YY_BREAK
+case 11:
+YY_RULE_SETUP
+#line 66 "jsp_count.l"
+{ BEGIN(INITIAL); }
+ YY_BREAK
+case 12:
+YY_RULE_SETUP
+#line 67 "jsp_count.l"
+{ count(); }
+ YY_BREAK
+case 13:
+YY_RULE_SETUP
+#line 68 "jsp_count.l"
+/* no-op */
+ YY_BREAK
+case 14:
+YY_RULE_SETUP
+#line 70 "jsp_count.l"
+{saw_char = 1;}
+ YY_BREAK
+case 15:
+YY_RULE_SETUP
+#line 71 "jsp_count.l"
+{
+ fprintf(stderr, "Warning: newline in string - file %s, line %ld\n",
+ filename, line_number);
+ count();
+ BEGIN(INITIAL); /* Switch back; this at least limits damage */
+ }
+ YY_BREAK
+case 16:
+YY_RULE_SETUP
+#line 77 "jsp_count.l"
+{ BEGIN(INITIAL);}
+ YY_BREAK
+case 17:
+YY_RULE_SETUP
+#line 79 "jsp_count.l"
+ECHO;
+ YY_BREAK
+case YY_STATE_EOF(INITIAL):
+case YY_STATE_EOF(chtml):
+case YY_STATE_EOF(cjsp):
+case YY_STATE_EOF(string):
+ yyterminate();
+
+ case YY_END_OF_BUFFER:
+ {
+ /* Amount of text matched not including the EOB char. */
+ int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1;
+
+ /* Undo the effects of YY_DO_BEFORE_ACTION. */
+ *yy_cp = yy_hold_char;
+ YY_RESTORE_YY_MORE_OFFSET
+
+ if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
+ {
+ /* We're scanning a new file or input source. It's
+ * possible that this happened because the user
+ * just pointed yyin at a new source and called
+ * yylex(). If so, then we have to assure
+ * consistency between yy_current_buffer and our
+ * globals. Here is the right place to do so, because
+ * this is the first action (other than possibly a
+ * back-up) that will match for the new input source.
+ */
+ yy_n_chars = yy_current_buffer->yy_n_chars;
+ yy_current_buffer->yy_input_file = yyin;
+ yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
+ }
+
+ /* Note that here we test for yy_c_buf_p "<=" to the position
+ * of the first EOB in the buffer, since yy_c_buf_p will
+ * already have been incremented past the NUL character
+ * (since all states make transitions on EOB to the
+ * end-of-buffer state). Contrast this with the test
+ * in input().
+ */
+ if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+ { /* This was really a NUL. */
+ yy_state_type yy_next_state;
+
+ yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state();
+
+ /* Okay, we're now positioned to make the NUL
+ * transition. We couldn't have
+ * yy_get_previous_state() go ahead and do it
+ * for us because it doesn't know how to deal
+ * with the possibility of jamming (and we don't
+ * want to build jamming into it because then it
+ * will run more slowly).
+ */
+
+ yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+ yy_bp = yytext_ptr + YY_MORE_ADJ;
+
+ if ( yy_next_state )
+ {
+ /* Consume the NUL. */
+ yy_cp = ++yy_c_buf_p;
+ yy_current_state = yy_next_state;
+ goto yy_match;
+ }
+
+ else
+ {
+ yy_cp = yy_c_buf_p;
+ goto yy_find_action;
+ }
+ }
+
+ else switch ( yy_get_next_buffer() )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ yy_did_buffer_switch_on_eof = 0;
+
+ if ( yywrap() )
+ {
+ /* Note: because we've taken care in
+ * yy_get_next_buffer() to have set up
+ * yytext, we can now set up
+ * yy_c_buf_p so that if some total
+ * hoser (like flex itself) wants to
+ * call the scanner after we return the
+ * YY_NULL, it'll still work - another
+ * YY_NULL will get returned.
+ */
+ yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
+
+ yy_act = YY_STATE_EOF(YY_START);
+ goto do_action;
+ }
+
+ else
+ {
+ if ( ! yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+ }
+ break;
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yy_c_buf_p =
+ yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state();
+
+ yy_cp = yy_c_buf_p;
+ yy_bp = yytext_ptr + YY_MORE_ADJ;
+ goto yy_match;
+
+ case EOB_ACT_LAST_MATCH:
+ yy_c_buf_p =
+ &yy_current_buffer->yy_ch_buf[yy_n_chars];
+
+ yy_current_state = yy_get_previous_state();
+
+ yy_cp = yy_c_buf_p;
+ yy_bp = yytext_ptr + YY_MORE_ADJ;
+ goto yy_find_action;
+ }
+ break;
+ }
+
+ default:
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--no action found" );
+ } /* end of action switch */
+ } /* end of scanning one token */
+ } /* end of yylex */
+
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ * EOB_ACT_LAST_MATCH -
+ * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ * EOB_ACT_END_OF_FILE - end of file
+ */
+
+static int yy_get_next_buffer()
+ {
+ register char *dest = yy_current_buffer->yy_ch_buf;
+ register char *source = yytext_ptr;
+ register int number_to_move, i;
+ int ret_val;
+
+ if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--end of buffer missed" );
+
+ if ( yy_current_buffer->yy_fill_buffer == 0 )
+ { /* Don't try to fill the buffer, so this is an EOF. */
+ if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
+ {
+ /* We matched a single character, the EOB, so
+ * treat this as a final EOF.
+ */
+ return EOB_ACT_END_OF_FILE;
+ }
+
+ else
+ {
+ /* We matched some text prior to the EOB, first
+ * process it.
+ */
+ return EOB_ACT_LAST_MATCH;
+ }
+ }
+
+ /* Try to read more data. */
+
+ /* First move last chars to start of buffer. */
+ number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1;
+
+ for ( i = 0; i < number_to_move; ++i )
+ *(dest++) = *(source++);
+
+ if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+ /* don't do the read, it's not guaranteed to return an EOF,
+ * just force an EOF
+ */
+ yy_current_buffer->yy_n_chars = yy_n_chars = 0;
+
+ else
+ {
+ int num_to_read =
+ yy_current_buffer->yy_buf_size - number_to_move - 1;
+
+ while ( num_to_read <= 0 )
+ { /* Not enough room in the buffer - grow it. */
+#ifdef YY_USES_REJECT
+ YY_FATAL_ERROR(
+"input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
+#else
+
+ /* just a shorter name for the current buffer */
+ YY_BUFFER_STATE b = yy_current_buffer;
+
+ int yy_c_buf_p_offset =
+ (int) (yy_c_buf_p - b->yy_ch_buf);
+
+ if ( b->yy_is_our_buffer )
+ {
+ int new_size = b->yy_buf_size * 2;
+
+ if ( new_size <= 0 )
+ b->yy_buf_size += b->yy_buf_size / 8;
+ else
+ b->yy_buf_size *= 2;
+
+ b->yy_ch_buf = (char *)
+ /* Include room in for 2 EOB chars. */
+ yy_flex_realloc( (void *) b->yy_ch_buf,
+ b->yy_buf_size + 2 );
+ }
+ else
+ /* Can't grow it, we don't own it. */
+ b->yy_ch_buf = 0;
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR(
+ "fatal error - scanner input buffer overflow" );
+
+ yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+ num_to_read = yy_current_buffer->yy_buf_size -
+ number_to_move - 1;
+#endif
+ }
+
+ if ( num_to_read > YY_READ_BUF_SIZE )
+ num_to_read = YY_READ_BUF_SIZE;
+
+ /* Read in more data. */
+ YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
+ yy_n_chars, num_to_read );
+
+ yy_current_buffer->yy_n_chars = yy_n_chars;
+ }
+
+ if ( yy_n_chars == 0 )
+ {
+ if ( number_to_move == YY_MORE_ADJ )
+ {
+ ret_val = EOB_ACT_END_OF_FILE;
+ yyrestart( yyin );
+ }
+
+ else
+ {
+ ret_val = EOB_ACT_LAST_MATCH;
+ yy_current_buffer->yy_buffer_status =
+ YY_BUFFER_EOF_PENDING;
+ }
+ }
+
+ else
+ ret_val = EOB_ACT_CONTINUE_SCAN;
+
+ yy_n_chars += number_to_move;
+ yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
+ yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
+
+ yytext_ptr = &yy_current_buffer->yy_ch_buf[0];
+
+ return ret_val;
+ }
+
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+static yy_state_type yy_get_previous_state()
+ {
+ register yy_state_type yy_current_state;
+ register char *yy_cp;
+
+ yy_current_state = yy_start;
+
+ for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
+ {
+ yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)];
+ if ( yy_accept[yy_current_state] )
+ {
+ yy_last_accepting_state = yy_current_state;
+ yy_last_accepting_cpos = yy_cp;
+ }
+ }
+
+ return yy_current_state;
+ }
+
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ * next_state = yy_try_NUL_trans( current_state );
+ */
+
+#ifdef YY_USE_PROTOS
+static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state )
+#else
+static yy_state_type yy_try_NUL_trans( yy_current_state )
+yy_state_type yy_current_state;
+#endif
+ {
+ register int yy_is_jam;
+ register char *yy_cp = yy_c_buf_p;
+
+ yy_current_state = yy_nxt[yy_current_state][1];
+ yy_is_jam = (yy_current_state <= 0);
+
+ if ( ! yy_is_jam )
+ {
+ if ( yy_accept[yy_current_state] )
+ {
+ yy_last_accepting_state = yy_current_state;
+ yy_last_accepting_cpos = yy_cp;
+ }
+ }
+
+ return yy_is_jam ? 0 : yy_current_state;
+ }
+
+
+#ifndef YY_NO_UNPUT
+#ifdef YY_USE_PROTOS
+static void yyunput( int c, register char *yy_bp )
+#else
+static void yyunput( c, yy_bp )
+int c;
+register char *yy_bp;
+#endif
+ {
+ register char *yy_cp = yy_c_buf_p;
+
+ /* undo effects of setting up yytext */
+ *yy_cp = yy_hold_char;
+
+ if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+ { /* need to shift things up to make room */
+ /* +2 for EOB chars. */
+ register int number_to_move = yy_n_chars + 2;
+ register char *dest = &yy_current_buffer->yy_ch_buf[
+ yy_current_buffer->yy_buf_size + 2];
+ register char *source =
+ &yy_current_buffer->yy_ch_buf[number_to_move];
+
+ while ( source > yy_current_buffer->yy_ch_buf )
+ *--dest = *--source;
+
+ yy_cp += (int) (dest - source);
+ yy_bp += (int) (dest - source);
+ yy_current_buffer->yy_n_chars =
+ yy_n_chars = yy_current_buffer->yy_buf_size;
+
+ if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+ YY_FATAL_ERROR( "flex scanner push-back overflow" );
+ }
+
+ *--yy_cp = (char) c;
+
+
+ yytext_ptr = yy_bp;
+ yy_hold_char = *yy_cp;
+ yy_c_buf_p = yy_cp;
+ }
+#endif /* ifndef YY_NO_UNPUT */
+
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput()
+#else
+static int input()
+#endif
+ {
+ int c;
+
+ *yy_c_buf_p = yy_hold_char;
+
+ if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
+ {
+ /* yy_c_buf_p now points to the character we want to return.
+ * If this occurs *before* the EOB characters, then it's a
+ * valid NUL; if not, then we've hit the end of the buffer.
+ */
+ if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+ /* This was really a NUL. */
+ *yy_c_buf_p = '\0';
+
+ else
+ { /* need more input */
+ int offset = yy_c_buf_p - yytext_ptr;
+ ++yy_c_buf_p;
+
+ switch ( yy_get_next_buffer() )
+ {
+ case EOB_ACT_LAST_MATCH:
+ /* This happens because yy_g_n_b()
+ * sees that we've accumulated a
+ * token and flags that we need to
+ * try matching the token before
+ * proceeding. But for input(),
+ * there's no matching to consider.
+ * So convert the EOB_ACT_LAST_MATCH
+ * to EOB_ACT_END_OF_FILE.
+ */
+
+ /* Reset buffer status. */
+ yyrestart( yyin );
+
+ /* fall through */
+
+ case EOB_ACT_END_OF_FILE:
+ {
+ if ( yywrap() )
+ return EOF;
+
+ if ( ! yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+#ifdef __cplusplus
+ return yyinput();
+#else
+ return input();
+#endif
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yy_c_buf_p = yytext_ptr + offset;
+ break;
+ }
+ }
+ }
+
+ c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */
+ *yy_c_buf_p = '\0'; /* preserve yytext */
+ yy_hold_char = *++yy_c_buf_p;
+
+
+ return c;
+ }
+#endif /* YY_NO_INPUT */
+
+#ifdef YY_USE_PROTOS
+void yyrestart( FILE *input_file )
+#else
+void yyrestart( input_file )
+FILE *input_file;
+#endif
+ {
+ if ( ! yy_current_buffer )
+ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
+
+ yy_init_buffer( yy_current_buffer, input_file );
+ yy_load_buffer_state();
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
+#else
+void yy_switch_to_buffer( new_buffer )
+YY_BUFFER_STATE new_buffer;
+#endif
+ {
+ if ( yy_current_buffer == new_buffer )
+ return;
+
+ if ( yy_current_buffer )
+ {
+ /* Flush out information for old buffer. */
+ *yy_c_buf_p = yy_hold_char;
+ yy_current_buffer->yy_buf_pos = yy_c_buf_p;
+ yy_current_buffer->yy_n_chars = yy_n_chars;
+ }
+
+ yy_current_buffer = new_buffer;
+ yy_load_buffer_state();
+
+ /* We don't actually know whether we did this switch during
+ * EOF (yywrap()) processing, but the only time this flag
+ * is looked at is after yywrap() is called, so it's safe
+ * to go ahead and always set it.
+ */
+ yy_did_buffer_switch_on_eof = 1;
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_load_buffer_state( void )
+#else
+void yy_load_buffer_state()
+#endif
+ {
+ yy_n_chars = yy_current_buffer->yy_n_chars;
+ yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
+ yyin = yy_current_buffer->yy_input_file;
+ yy_hold_char = *yy_c_buf_p;
+ }
+
+
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_create_buffer( FILE *file, int size )
+#else
+YY_BUFFER_STATE yy_create_buffer( file, size )
+FILE *file;
+int size;
+#endif
+ {
+ YY_BUFFER_STATE b;
+
+ b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_buf_size = size;
+
+ /* yy_ch_buf has to be 2 characters longer than the size given because
+ * we need to put in 2 end-of-buffer characters.
+ */
+ b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_is_our_buffer = 1;
+
+ yy_init_buffer( b, file );
+
+ return b;
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_delete_buffer( YY_BUFFER_STATE b )
+#else
+void yy_delete_buffer( b )
+YY_BUFFER_STATE b;
+#endif
+ {
+ if ( ! b )
+ return;
+
+ if ( b == yy_current_buffer )
+ yy_current_buffer = (YY_BUFFER_STATE) 0;
+
+ if ( b->yy_is_our_buffer )
+ yy_flex_free( (void *) b->yy_ch_buf );
+
+ yy_flex_free( (void *) b );
+ }
+
+
+
+#ifdef YY_USE_PROTOS
+void yy_init_buffer( YY_BUFFER_STATE b, FILE *file )
+#else
+void yy_init_buffer( b, file )
+YY_BUFFER_STATE b;
+FILE *file;
+#endif
+
+
+ {
+ yy_flush_buffer( b );
+
+ b->yy_input_file = file;
+ b->yy_fill_buffer = 1;
+
+#if YY_ALWAYS_INTERACTIVE
+ b->yy_is_interactive = 1;
+#else
+#if YY_NEVER_INTERACTIVE
+ b->yy_is_interactive = 0;
+#else
+ b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
+#endif
+#endif
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_flush_buffer( YY_BUFFER_STATE b )
+#else
+void yy_flush_buffer( b )
+YY_BUFFER_STATE b;
+#endif
+
+ {
+ if ( ! b )
+ return;
+
+ b->yy_n_chars = 0;
+
+ /* We always need two end-of-buffer characters. The first causes
+ * a transition to the end-of-buffer state. The second causes
+ * a jam in that state.
+ */
+ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+ b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+ b->yy_buf_pos = &b->yy_ch_buf[0];
+
+ b->yy_at_bol = 1;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ if ( b == yy_current_buffer )
+ yy_load_buffer_state();
+ }
+
+
+#ifndef YY_NO_SCAN_BUFFER
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size )
+#else
+YY_BUFFER_STATE yy_scan_buffer( base, size )
+char *base;
+yy_size_t size;
+#endif
+ {
+ YY_BUFFER_STATE b;
+
+ if ( size < 2 ||
+ base[size-2] != YY_END_OF_BUFFER_CHAR ||
+ base[size-1] != YY_END_OF_BUFFER_CHAR )
+ /* They forgot to leave room for the EOB's. */
+ return 0;
+
+ b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
+
+ b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */
+ b->yy_buf_pos = b->yy_ch_buf = base;
+ b->yy_is_our_buffer = 0;
+ b->yy_input_file = 0;
+ b->yy_n_chars = b->yy_buf_size;
+ b->yy_is_interactive = 0;
+ b->yy_at_bol = 1;
+ b->yy_fill_buffer = 0;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ yy_switch_to_buffer( b );
+
+ return b;
+ }
+#endif
+
+
+#ifndef YY_NO_SCAN_STRING
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str )
+#else
+YY_BUFFER_STATE yy_scan_string( yy_str )
+yyconst char *yy_str;
+#endif
+ {
+ int len;
+ for ( len = 0; yy_str[len]; ++len )
+ ;
+
+ return yy_scan_bytes( yy_str, len );
+ }
+#endif
+
+
+#ifndef YY_NO_SCAN_BYTES
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len )
+#else
+YY_BUFFER_STATE yy_scan_bytes( bytes, len )
+yyconst char *bytes;
+int len;
+#endif
+ {
+ YY_BUFFER_STATE b;
+ char *buf;
+ yy_size_t n;
+ int i;
+
+ /* Get memory for full buffer, including space for trailing EOB's. */
+ n = len + 2;
+ buf = (char *) yy_flex_alloc( n );
+ if ( ! buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
+
+ for ( i = 0; i < len; ++i )
+ buf[i] = bytes[i];
+
+ buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR;
+
+ b = yy_scan_buffer( buf, n );
+ if ( ! b )
+ YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
+
+ /* It's okay to grow etc. this buffer, and we should throw it
+ * away when we're done.
+ */
+ b->yy_is_our_buffer = 1;
+
+ return b;
+ }
+#endif
+
+
+#ifndef YY_NO_PUSH_STATE
+#ifdef YY_USE_PROTOS
+static void yy_push_state( int new_state )
+#else
+static void yy_push_state( new_state )
+int new_state;
+#endif
+ {
+ if ( yy_start_stack_ptr >= yy_start_stack_depth )
+ {
+ yy_size_t new_size;
+
+ yy_start_stack_depth += YY_START_STACK_INCR;
+ new_size = yy_start_stack_depth * sizeof( int );
+
+ if ( ! yy_start_stack )
+ yy_start_stack = (int *) yy_flex_alloc( new_size );
+
+ else
+ yy_start_stack = (int *) yy_flex_realloc(
+ (void *) yy_start_stack, new_size );
+
+ if ( ! yy_start_stack )
+ YY_FATAL_ERROR(
+ "out of memory expanding start-condition stack" );
+ }
+
+ yy_start_stack[yy_start_stack_ptr++] = YY_START;
+
+ BEGIN(new_state);
+ }
+#endif
+
+
+#ifndef YY_NO_POP_STATE
+static void yy_pop_state()
+ {
+ if ( --yy_start_stack_ptr < 0 )
+ YY_FATAL_ERROR( "start-condition stack underflow" );
+
+ BEGIN(yy_start_stack[yy_start_stack_ptr]);
+ }
+#endif
+
+
+#ifndef YY_NO_TOP_STATE
+static int yy_top_state()
+ {
+ return yy_start_stack[yy_start_stack_ptr - 1];
+ }
+#endif
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+#ifdef YY_USE_PROTOS
+static void yy_fatal_error( yyconst char msg[] )
+#else
+static void yy_fatal_error( msg )
+char msg[];
+#endif
+ {
+ (void) fprintf( stderr, "%s\n", msg );
+ exit( YY_EXIT_FAILURE );
+ }
+
+
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ yytext[yyleng] = yy_hold_char; \
+ yy_c_buf_p = yytext + n; \
+ yy_hold_char = *yy_c_buf_p; \
+ *yy_c_buf_p = '\0'; \
+ yyleng = n; \
+ } \
+ while ( 0 )
+
+
+/* Internal utility routines. */
+
+#ifndef yytext_ptr
+#ifdef YY_USE_PROTOS
+static void yy_flex_strncpy( char *s1, yyconst char *s2, int n )
+#else
+static void yy_flex_strncpy( s1, s2, n )
+char *s1;
+yyconst char *s2;
+int n;
+#endif
+ {
+ register int i;
+ for ( i = 0; i < n; ++i )
+ s1[i] = s2[i];
+ }
+#endif
+
+#ifdef YY_NEED_STRLEN
+#ifdef YY_USE_PROTOS
+static int yy_flex_strlen( yyconst char *s )
+#else
+static int yy_flex_strlen( s )
+yyconst char *s;
+#endif
+ {
+ register int n;
+ for ( n = 0; s[n]; ++n )
+ ;
+
+ return n;
+ }
+#endif
+
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_alloc( yy_size_t size )
+#else
+static void *yy_flex_alloc( size )
+yy_size_t size;
+#endif
+ {
+ return (void *) malloc( size );
+ }
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_realloc( void *ptr, yy_size_t size )
+#else
+static void *yy_flex_realloc( ptr, size )
+void *ptr;
+yy_size_t size;
+#endif
+ {
+ /* The cast to (char *) in the following accommodates both
+ * implementations that use char* generic pointers, and those
+ * that use void* generic pointers. It works with the latter
+ * because both ANSI C and C++ allow castless assignment from
+ * any pointer type to void*, and deal with argument conversions
+ * as though doing an assignment.
+ */
+ return (void *) realloc( (char *) ptr, size );
+ }
+
+#ifdef YY_USE_PROTOS
+static void yy_flex_free( void *ptr )
+#else
+static void yy_flex_free( ptr )
+void *ptr;
+#endif
+ {
+ free( ptr );
+ }
+
+#if YY_MAIN
+int main()
+ {
+ yylex();
+ return 0;
+ }
+#endif
+#line 79 "jsp_count.l"
+
+
+#include "driver.c"
+
+static void count(void)
+{
+ if ( saw_char ) {
+ sloc++;
+ saw_char = 0;
+ }
+ line_number++;
+}
diff --git a/jsp_count.l b/jsp_count.l
new file mode 100644
index 0000000..a9ad5d6
--- /dev/null
+++ b/jsp_count.l
@@ -0,0 +1,90 @@
+%{
+
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler and Bob Brown.
+This is a tweaked version by Bob Brown, derived from
+David A. Wheeler's pascal_count.l.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+Bob Brown's website is: http://www.openeye.com/rlb
+*/
+
+#include "driver.h"
+
+#define YY_NO_UNPUT
+
+/* 1 if we saw a non-comment, non-whitespace char on this line */
+int saw_char = 0;
+static void count(void);
+
+%}
+
+%option noyywrap
+
+SPACE [ \t\n\r\f]
+
+%x chtml
+%x cjsp
+%x string
+
+%%
+ line_number = 1;
+ saw_char = 0;
+ BEGIN(INITIAL);
+
+[ \t\r\f] /* Do nothing */
+"<!--" { BEGIN(chtml); }
+"<%--" { BEGIN(cjsp); }
+\n { count(); }
+
+\" {saw_char = 1; BEGIN(string);}
+
+[^ \t\r\f(\n<"][^<\n"]* {saw_char = 1;}
+. {saw_char = 1;}
+
+
+<chtml>"-->" { BEGIN(INITIAL); }
+<chtml>\n { count(); }
+<chtml>. /* no-op */
+
+<cjsp>"--%>" { BEGIN(INITIAL); }
+<cjsp>\n { count(); }
+<cjsp>. /* no-op */
+
+<string>[^\"\n]+ {saw_char = 1;}
+<string>\n {
+ fprintf(stderr, "Warning: newline in string - file %s, line %ld\n",
+ filename, line_number);
+ count();
+ BEGIN(INITIAL); /* Switch back; this at least limits damage */
+ }
+<string>\" { BEGIN(INITIAL);}
+
+%%
+
+#include "driver.c"
+
+static void count(void)
+{
+ if ( saw_char ) {
+ sloc++;
+ saw_char = 0;
+ }
+ line_number++;
+}
diff --git a/lex_count b/lex_count
new file mode 100755
index 0000000..f0adfaf
--- /dev/null
+++ b/lex_count
@@ -0,0 +1,70 @@
+#!/usr/bin/perl
+# lex_count
+# Usage: lex_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+
+ $sloc = `lexcount1 < "$file"`;
+ chomp($sloc);
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+}
+
diff --git a/lexcount1.c b/lexcount1.c
new file mode 100644
index 0000000..2056b14
--- /dev/null
+++ b/lexcount1.c
@@ -0,0 +1,58 @@
+
+/* lexcount1 - ignore C comments, count all lines with non-whitespace. */
+/* Read from stdin */
+/* Basically, this is enough machinery to count the physical SLOC for
+ a single file using C comments, e.g., lex. */
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+#include <stdio.h>
+#include <ctype.h>
+
+int peek() {
+ int c = getchar();
+ ungetc(c, stdin);
+ return c;
+}
+
+int main() {
+ int c;
+ int incomment = 0;
+ long sloc = 0;
+ int nonspace = 0;
+
+ while ( (c = getchar()) != EOF) {
+ if (!incomment) {
+ if ((c == '/') && (peek() == '*')) {incomment=1;}
+ else if (!isspace(c)) {nonspace = 1;}
+ } else {
+ if ((c == '*') && (peek() == '/')) {
+ c= getchar(); c=getchar(); incomment=0;
+ }
+ }
+ if ((c == '\n') && nonspace) {sloc++;}
+ }
+ printf("%ld\n", sloc);
+ return 0; /* Report success. */
+}
+
diff --git a/linux_unique b/linux_unique
new file mode 100755
index 0000000..160b9bc
--- /dev/null
+++ b/linux_unique
@@ -0,0 +1,64 @@
+#!/bin/sh
+
+# Show commands as they're executed.
+
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+echo "Per subdir"
+
+print_sum_subset BUILD/linux/Documentation/
+print_sum_subset BUILD/linux/arch/
+print_sum_subset BUILD/linux/configs/
+print_sum_subset BUILD/linux/drivers/
+print_sum_subset BUILD/linux/fs/
+print_sum_subset BUILD/linux/ibcs/
+print_sum_subset BUILD/linux/include/
+print_sum_subset BUILD/linux/init/
+print_sum_subset BUILD/linux/ipc/
+print_sum_subset BUILD/linux/kernel/
+print_sum_subset BUILD/linux/ksymoops-0.7c/
+print_sum_subset BUILD/linux/lib/
+print_sum_subset BUILD/linux/mm/
+print_sum_subset BUILD/linux/net/
+print_sum_subset BUILD/linux/pcmcia-cs-3.1.8/
+print_sum_subset BUILD/linux/scripts/
+
+echo "i386 vs. non-86"
+
+print_sum_subset BUILD/linux/arch/
+print_sum_subset BUILD/linux/arch/i386
+print_sum_subset linux/drivers/sbus/
+print_sum_subset linux/drivers/macintosh/
+print_sum_subset linux/drivers/sgi/
+print_sum_subset linux/drivers/fc4/
+print_sum_subset linux/drivers/nubus/
+print_sum_subset linux/drivers/acorn/
+print_sum_subset linux/drivers/s390/
+print_sum_subset linux/Documentation/
+print_sum_subset linux/arch
+
+
+
+
diff --git a/lisp_count b/lisp_count
new file mode 100755
index 0000000..ee8d8a5
--- /dev/null
+++ b/lisp_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count ';' $@
+
diff --git a/make_filelists b/make_filelists
new file mode 100755
index 0000000..5440d50
--- /dev/null
+++ b/make_filelists
@@ -0,0 +1,193 @@
+#!/bin/sh
+
+# On the command line, list the source code directories, e.g.:
+# /usr/src/redhat/BUILD/*
+# This command creates a set of directories paralleling the source code
+# directories, with a file named "filelist" listing all the files.
+
+# This script goes through some trouble to turn all relative references
+# into absolute pathnames, to make sure that the intended files
+# are always referenced. Conceivably the current directory isn't the
+# data directory and the parameters given use relative addressing,
+# and we need to fix all that here.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+
+if [ "$#" -eq 0 ]
+then
+ echo "Error: You must provide a list of directories."
+ exit 1
+fi
+
+
+follow=""
+skip=""
+prefix=""
+startingdir=`pwd`
+datadir=`pwd`
+
+while [ "$#" -gt 0 ]
+do
+ case "$1"
+ in
+ --follow) follow="-follow"
+ shift;;
+ --datadir) shift
+ if [ ! -d "$1" ]
+ then
+ echo "Error: $1 is not a directory"
+ exit 1
+ fi
+ cd "$1"
+ datadir=`pwd`
+ cd "$startingdir"
+ shift;;
+ --skip) shift
+ skip="$1"
+ shift;;
+ --prefix) shift
+ prefix="$1"
+ shift;;
+ --) shift; break;;
+ --*) echo "Error: unrecognized option $1"
+ exit 1
+ shift ;;
+ *) break;;
+ esac
+done
+
+# Non-directories will be placed into the "top_dir" data directory:
+toplevel_name="${prefix}top_dir"
+
+for possible_dir
+do
+ # Reset to starting directory each time, so that relative directory
+ # requests will be processed correctly.
+ cd "$startingdir"
+
+ # Translate "." into the name of current directory.
+ # We have to handle "." and ".." specially, because we can't place
+ # files with these names into the data directory.
+ if [ "$possible_dir" = "." ]
+ then
+ possible_dir=`pwd`
+ fi
+ if [ "$possible_dir" = ".." ]
+ then
+ cd ..
+ possible_dir=`pwd`
+ # Reset current directory.
+ cd "$startingdir"
+ fi
+
+ base=`basename "$possible_dir"`
+ if [ "$base" = "$skip" ]
+ then
+ continue
+ fi
+
+ if [ -d "$possible_dir" ]
+ then
+ # Set "dir" to real name (if possible_dir is a symlink to another
+ # directory, then "dir" and "possible_dir" may have very different values)
+ # depending on how "cd" is implemented on your shell.
+ cd "$possible_dir"
+ dir=`pwd`
+
+ # The child directory's name is derived from possible_dir, not dir --
+ # that way, directories we create will have names based on the supplied
+ # name (potentially a link), not the linked-to directory's name.
+ # Thus, symlinks can be used to disambiguate names where necessary.
+ childname="${prefix}${base}"
+
+ cd "$datadir"
+ if [ -d "$childname" ]
+ then
+ echo "WARNING! Directory $childname pre-existed when adding $possible_dir"
+ else
+ mkdir "$childname"
+ fi
+
+ echo "Creating filelist for $childname"
+ find "$dir" $follow -type f -print > "${childname}/filelist"
+
+ # If it exists, copy the PROGRAM_LICENSE.
+ if [ -s "${dir}/PROGRAM_LICENSE" ]
+ then
+ cp "${dir}/PROGRAM_LICENSE" "${childname}/PROGRAM_LICENSE"
+ fi
+ # If it exists, copy the ORIGINAL_SPEC_FILE
+ if [ -s "${dir}/ORIGINAL_SPEC_FILE" ]
+ then
+ cp "${dir}/ORIGINAL_SPEC_FILE" "${childname}/ORIGINAL_SPEC_FILE"
+ fi
+
+ # Do some error-checking.
+ if [ ! -s "${childname}/filelist" ]
+ then
+ # This is inefficient, but it doesn't matter - it's only used
+ # when we have an empty filelist (which is often an error condition)
+ saw_a_file=n
+ for x in ls "$dir"
+ do
+ saw_a_file=y
+ break
+ done
+ case $saw_a_file
+ in
+ n)
+ echo "Warning: directory ${childname} got no files."
+ echo "You may need to use the --follow option.";;
+ esac
+ fi
+
+ elif [ -f "$possible_dir" ]
+ then
+ # We have a non-directory (regular file, symlink to a file, etc.).
+ # We'll just add an absolute path to it into the toplevel_name directory.
+
+ # First, convert possible_dir into an absolute pathname if necessary:
+ pathname="$possible_dir"
+ case "$pathname"
+ in
+ /*) ;; # Already absolute pathname - do nothing.
+ *) pathname="${startingdir}/${possible_dir}" ;;
+ esac
+
+ # Add it to the toplevel_name directory (creating the directory if needed)
+ cd "$datadir"
+ if [ ! -d "$toplevel_name" ]
+ then
+ echo "Have a non-directory at the top, so creating directory $toplevel_name"
+ mkdir "$toplevel_name"
+ fi
+ echo "Adding $pathname to $toplevel_name"
+ echo "$pathname" >> "${toplevel_name}/filelist"
+ else
+ echo "WARNING!!! Not a file nor a directory (so ignored): $possible_dir"
+ fi
+done
+exit 0
+
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..0c029f1
--- /dev/null
+++ b/makefile
@@ -0,0 +1,246 @@
+# Makefile for SLOCCount.
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+
+# My thanks to John Clezy, who provided the modifications to this makefile
+# to make sloccount work on Windows using Cygwin.
+
+# You may need to change the following options to install on your machine:
+
+# Set this for where to store the man pages and executables.
+# If you want to store this as part of an official distribution,
+# change this to "/usr":
+PREFIX=/usr/local
+
+# Set "EXE_SUFFIX" to ".exe" if you're running on Windows, like this:
+# EXE_SUFFIX=.exe
+EXE_SUFFIX=
+
+# Set this to your C compiler, if it's not "gcc"; a likely alternative is "cc".
+# The "-Wall" option turns on warnings in gcc. gcc users might also want
+# to consider using "-Werror", which turns warnings into errors.
+CC=gcc -Wall
+
+# Set this to the name of your "install" program. On some systems,
+# "install -C" would be useful (so unchanged files won't be modified),
+# but not all systems support this install option. "Install" should work
+# for any Unix-like system as well as for Cygwin.
+# "INSTALL_A_DIR" is the command to create a directory in the first place.
+INSTALL=install
+INSTALL_A_DIR=$(INSTALL) -d
+
+# Set this to the name of the program to create RPMs.
+# This works for Red Hat Linux 8.0:
+RPMBUILD=rpmbuild -ba
+# This works for Red Hat Linux 7.X and below:
+# RPMBUILD=rpm -ba
+
+
+# From here on, nothing should need changing unless you're
+# changing the code itself.
+
+# To change the version #, change VERSION here, sloccount.spec,
+# sloccount, and sloccount.html.
+# Note to self: to redistribute, do this:
+# make distribute; su; make rpm; (test as desired);
+# rpm -e sloccount; ^D; make my_install; send to web site.
+
+
+NAME=sloccount
+VERSION=2.26
+RPM_VERSION=1
+ARCH=i386
+VERSIONEDNAME=$(NAME)-$(VERSION)
+INSTALL_DIR=$(PREFIX)/bin
+MAN_DIR=$(PREFIX)/share/man
+MAN_DIR_MAN1=$(MAN_DIR)/man1
+DOC_DIR=$(PREFIX)/share/doc/$(VERSIONEDNAME)-$(RPM_VERSION)
+POSTED_DIR=/home/dwheeler/dwheeler.com/sloccount
+
+COMPILED_EXECUTABLES= \
+ c_count$(EXE_SUFFIX) \
+ java_count$(EXE_SUFFIX) \
+ lexcount1$(EXE_SUFFIX) \
+ pascal_count$(EXE_SUFFIX) \
+ php_count$(EXE_SUFFIX) \
+ jsp_count$(EXE_SUFFIX) \
+ ml_count$(EXE_SUFFIX)
+
+EXECUTABLES= \
+ ada_count \
+ asm_count \
+ awk_count \
+ break_filelist \
+ cobol_count \
+ compute_all \
+ compute_sloc_lang \
+ count_extensions \
+ count_unknown_ext \
+ csh_count \
+ exp_count \
+ fortran_count \
+ f90_count \
+ generic_count \
+ get_sloc \
+ get_sloc_details \
+ haskell_count \
+ lex_count \
+ lisp_count \
+ make_filelists \
+ makefile_count \
+ modula3_count \
+ objc_count \
+ perl_count \
+ print_sum \
+ python_count \
+ ruby_count \
+ sed_count \
+ sh_count \
+ show_filecount \
+ sloccount \
+ sql_count \
+ tcl_count \
+ $(COMPILED_EXECUTABLES)
+
+MANPAGES=sloccount.1.gz
+
+MYDOCS=sloccount.html README TODO ChangeLog
+
+
+all: $(COMPILED_EXECUTABLES)
+
+lexcount1$(EXE_SUFFIX): lexcount1.c
+ $(CC) lexcount1.c -o lexcount1$(EXE_SUFFIX)
+
+c_count$(EXE_SUFFIX): c_count.c
+ $(CC) c_count.c -o c_count$(EXE_SUFFIX)
+
+php_count$(EXE_SUFFIX): php_count.c
+ $(CC) php_count.c -o php_count$(EXE_SUFFIX)
+
+pascal_count.c: pascal_count.l driver.c driver.h
+ flex -Cfe -t pascal_count.l > pascal_count.c
+
+pascal_count$(EXE_SUFFIX): pascal_count.c
+ $(CC) pascal_count.c -o pascal_count$(EXE_SUFFIX)
+
+jsp_count.c: jsp_count.l driver.c driver.h
+ flex -Cfe -t jsp_count.l > jsp_count.c
+
+jsp_count$(EXE_SUFFIX): jsp_count.c
+ $(CC) jsp_count.c -o jsp_count$(EXE_SUFFIX)
+
+ml_count$(EXE_SUFFIX): ml_count.c
+ $(CC) ml_count.c -o ml_count$(EXE_SUFFIX)
+
+sloccount.1.gz: sloccount.1
+ gzip -c sloccount.1 > sloccount.1.gz
+
+# Currently "java_count" is the same as "c_count":
+java_count$(EXE_SUFFIX): c_count$(EXE_SUFFIX)
+ cp -p c_count$(EXE_SUFFIX) java_count$(EXE_SUFFIX)
+
+# This is USC's code counter, not built by default:
+c_lines: C_LINES.C
+ $(CC) C_LINES.C -o c_lines$(EXE_SUFFIX)
+
+
+install_programs: all
+ $(INSTALL) $(EXECUTABLES) $(INSTALL_DIR)
+
+uninstall_programs:
+ cd $(INSTALL_DIR) && rm -f $(EXECUTABLES)
+
+install_man: $(MANPAGES)
+ $(INSTALL_A_DIR) $(MAN_DIR_MAN1)
+ $(INSTALL) $(MANPAGES) $(MAN_DIR_MAN1)
+
+uninstall_man:
+ cd $(MAN_DIR_MAN1) && rm -f $(MANPAGES)
+
+install_docs: install_man
+ $(INSTALL_A_DIR) $(DOC_DIR)
+ $(INSTALL) $(MYDOCS) $(DOC_DIR)
+
+uninstall_docs:
+ rm -fr $(DOC_DIR)
+
+
+install: install_programs install_man install_docs
+
+uninstall: uninstall_programs uninstall_docs uninstall_man
+
+
+clean:
+ -rm -f $(COMPILED_EXECUTABLES) core sloccount.1.gz
+
+phptest: php_count
+ ./php_count *.php
+ ./php_count /usr/share/php/*.php
+ ./php_count /usr/share/php/*/*.php
+
+# "make distribute" creates the tarball.
+
+
+distribute: clean $(MANPAGES)
+ rm -f sloccount-$(VERSION).tgz
+ rm -f sloccount-$(VERSION).tar.gz
+ mkdir 9temp
+ cp -pr [A-Za-z]* 9temp
+ mv 9temp $(VERSIONEDNAME)
+ rm -f $(VERSIONEDNAME)/*.tgz
+ rm -f $(VERSIONEDNAME)/*.tar.gz
+ rm -f $(VERSIONEDNAME)/*.rpm
+# rm -f $(VERSIONEDNAME)/*.1.gz
+ rm -f $(VERSIONEDNAME)/C_LINES.C
+ rm -f $(VERSIONEDNAME)/java_lines.c
+ rm -f $(VERSIONEDNAME)/c_lines
+ tar -cvf - $(VERSIONEDNAME)/* | \
+ gzip --best > $(VERSIONEDNAME).tar.gz
+ chown --reference=README $(VERSIONEDNAME).tar.gz
+ chmod a+rX *
+ rm -fr $(VERSIONEDNAME)
+
+my_install: distribute
+ chmod a+rX *
+ cp -p sloccount-$(VERSION).tar.gz $(POSTED_DIR)
+ cp -p sloccount.html $(POSTED_DIR)
+ cp -p ChangeLog $(POSTED_DIR)
+ cp -p TODO $(POSTED_DIR)
+ cp -p /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm $(POSTED_DIR)
+ cp -p /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm $(POSTED_DIR)
+
+rpm: distribute
+ cp $(VERSIONEDNAME).tar.gz /usr/src/redhat/SOURCES
+ cp sloccount.spec /usr/src/redhat/SPECS
+ cd /usr/src/redhat/SPECS
+ # Uninstall current sloccount if any; ignore errors if not installed.
+ -rpm -e sloccount
+ $(RPMBUILD) sloccount.spec
+ chmod a+r /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm
+ chmod a+r /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm
+ rpm -ivh /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm
+ echo "Use rpm -e $(NAME) to remove the package"
+
+test: all
+ PATH=.:${PATH}; sloccount testcode
+
+
diff --git a/makefile.orig b/makefile.orig
new file mode 100644
index 0000000..e2753d8
--- /dev/null
+++ b/makefile.orig
@@ -0,0 +1,222 @@
+# Makefile for SLOCCount.
+# (C) Copyright David A. Wheeler 2000-2002.
+# Licensed under the GPL; see sloccount.html for license information.
+# My thanks to John Clezy, who provided the modifications to this makefile
+# to make sloccount work on Windows using Cygwin.
+
+
+# You may need to change the following options to install on your machine:
+
+# Set this for where to store the man pages and executables.
+# If you want to store this as part of an official distribution,
+# change this to "/usr":
+PREFIX=/usr/local
+
+# Set "EXE_SUFFIX" to ".exe" if you're running on Windows, like this:
+# EXE_SUFFIX=.exe
+EXE_SUFFIX=
+
+# Set this to your C compiler, if it's not "gcc"; a likely alternative is "cc".
+# The "-Wall" option turns on warnings in gcc. gcc users might also want
+# to consider using "-Werror", which turns warnings into errors.
+CC=gcc -Wall
+
+# Set this to the name of your "install" program. On some systems,
+# "install -C" would be useful (so unchanged files won't be modified),
+# but not all systems support this install option. "Install" should work
+# for any Unix-like system as well as for Cygwin.
+# "INSTALL_A_DIR" is the command to create a directory in the first place.
+INSTALL=install
+INSTALL_A_DIR=$(INSTALL) -d
+
+# Set this to the name of the program to create RPMs.
+# This works for Red Hat Linux 8.0:
+RPMBUILD=rpmbuild -ba
+# This works for Red Hat Linux 7.X and below:
+# RPMBUILD=rpm -ba
+
+
+# From here on, nothing should need changing unless you're
+# changing the code itself.
+
+# To change the version #, change VERSION here, sloccount.spec, and
+# sloccount.html.
+# Note to self: to redistribute, do this:
+# make distribute; su; make rpm; (test as desired);
+# rpm -e sloccount; ^D; make my_install; send to web site.
+
+
+NAME=sloccount
+VERSION=2.20
+RPM_VERSION=1
+ARCH=i386
+VERSIONEDNAME=$(NAME)-$(VERSION)
+INSTALL_DIR=$(PREFIX)/bin
+MAN_DIR=$(PREFIX)/share/man
+MAN_DIR_MAN1=$(MAN_DIR)/man1
+DOC_DIR=$(PREFIX)/share/doc/$(VERSIONEDNAME)-$(RPM_VERSION)
+POSTED_DIR=/home/dwheeler/dwheeler.com/sloccount
+
+COMPILED_EXECUTABLES= \
+ c_count$(EXE_SUFFIX) \
+ java_count$(EXE_SUFFIX) \
+ lexcount1$(EXE_SUFFIX) \
+ pascal_count$(EXE_SUFFIX) \
+ php_count$(EXE_SUFFIX) \
+ jsp_count$(EXE_SUFFIX)
+
+EXECUTABLES= \
+ ada_count \
+ asm_count \
+ awk_count \
+ break_filelist \
+ cobol_count \
+ compute_all \
+ compute_sloc_lang \
+ count_extensions \
+ count_unknown_ext \
+ csh_count \
+ exp_count \
+ fortran_count \
+ generic_count \
+ get_sloc \
+ get_sloc_details \
+ haskell_count \
+ lex_count \
+ lisp_count \
+ make_filelists \
+ makefile_count \
+ ml_count \
+ modula3_count \
+ objc_count \
+ perl_count \
+ print_sum \
+ python_count \
+ ruby_count \
+ sed_count \
+ sh_count \
+ show_filecount \
+ sloccount \
+ sql_count \
+ tcl_count \
+ $(COMPILED_EXECUTABLES)
+
+MANPAGES=sloccount.1.gz
+
+MYDOCS=sloccount.html README TODO ChangeLog
+
+
+all: $(COMPILED_EXECUTABLES)
+
+lexcount1$(EXE_SUFFIX): lexcount1.c
+ $(CC) lexcount1.c -o lexcount1$(EXE_SUFFIX)
+
+c_count$(EXE_SUFFIX): c_count.c
+ $(CC) c_count.c -o c_count$(EXE_SUFFIX)
+
+php_count$(EXE_SUFFIX): php_count.c
+ $(CC) php_count.c -o php_count$(EXE_SUFFIX)
+
+pascal_count.c: pascal_count.l driver.c driver.h
+ flex -Cfe -t pascal_count.l > pascal_count.c
+
+pascal_count$(EXE_SUFFIX): pascal_count.c
+ $(CC) pascal_count.c -o pascal_count$(EXE_SUFFIX)
+
+jsp_count.c: jsp_count.l driver.c driver.h
+ flex -Cfe -t jsp_count.l > jsp_count.c
+
+jsp_count$(EXE_SUFFIX): jsp_count.c
+ $(CC) jsp_count.c -o jsp_count$(EXE_SUFFIX)
+
+sloccount.1.gz: sloccount.1
+ gzip -c sloccount.1 > sloccount.1.gz
+
+# Currently "java_count" is the same as "c_count":
+java_count$(EXE_SUFFIX): c_count$(EXE_SUFFIX)
+ cp -p c_count$(EXE_SUFFIX) java_count$(EXE_SUFFIX)
+
+# This is USC's code counter, not built by default:
+c_lines: C_LINES.C
+ $(CC) C_LINES.C -o c_lines$(EXE_SUFFIX)
+
+
+install_programs: all
+ $(INSTALL) $(EXECUTABLES) $(INSTALL_DIR)
+
+uninstall_programs:
+ cd $(INSTALL_DIR) && rm -f $(EXECUTABLES)
+
+install_man:
+ $(INSTALL_A_DIR) $(MAN_DIR_MAN1)
+ $(INSTALL) $(MANPAGES) $(MAN_DIR_MAN1)
+
+uninstall_man:
+ cd $(MAN_DIR_MAN1) && rm -f $(MANPAGES)
+
+install_docs: install_man
+ $(INSTALL_A_DIR) $(DOC_DIR)
+ $(INSTALL) $(MYDOCS) $(DOC_DIR)
+
+uninstall_docs:
+ cd $(DOC_DIR) && rm -f $(MYDOCS) && rmdir $(DOC_DIR)
+
+
+install: install_programs install_man install_docs
+
+uninstall: uninstall_programs uninstall_docs uninstall_man
+
+
+clean:
+ -rm -f $(COMPILED_EXECUTABLES) core sloccount.1.gz
+
+phptest: php_count
+ ./php_count *.php
+ ./php_count /usr/share/php/*.php
+ ./php_count /usr/share/php/*/*.php
+
+# "make distribute" creates the tarball.
+
+
+distribute: clean $(MANPAGES)
+ rm -f sloccount-$(VERSION).tgz
+ rm -f sloccount-$(VERSION).tar.gz
+ mkdir 9temp
+ cp -pr [A-Za-z]* 9temp
+ mv 9temp $(VERSIONEDNAME)
+ rm -f $(VERSIONEDNAME)/*.tgz
+ rm -f $(VERSIONEDNAME)/*.tar.gz
+ rm -f $(VERSIONEDNAME)/*.rpm
+# rm -f $(VERSIONEDNAME)/*.1.gz
+ rm -f $(VERSIONEDNAME)/C_LINES.C
+ rm -f $(VERSIONEDNAME)/java_lines.c
+ rm -f $(VERSIONEDNAME)/c_lines
+ tar -cvf - $(VERSIONEDNAME)/* | \
+ gzip --best > $(VERSIONEDNAME).tar.gz
+ chown --reference=README $(VERSIONEDNAME).tar.gz
+ chmod a+rX *
+ rm -fr $(VERSIONEDNAME)
+
+my_install: distribute
+ chmod a+rX *
+ cp -p sloccount-$(VERSION).tar.gz $(POSTED_DIR)
+ cp -p sloccount.html $(POSTED_DIR)
+ cp -p ChangeLog $(POSTED_DIR)
+ cp -p TODO $(POSTED_DIR)
+ cp -p /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm $(POSTED_DIR)
+ cp -p /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm $(POSTED_DIR)
+
+
+rpm: distribute
+ cp $(VERSIONEDNAME).tar.gz /usr/src/redhat/SOURCES
+ cp sloccount.spec /usr/src/redhat/SPECS
+ cd /usr/src/redhat/SPECS
+ # Uninstall current sloccount if any; ignore errors if not installed.
+ -rpm -e sloccount
+ $(RPMBUILD) sloccount.spec
+ chmod a+r /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm
+ chmod a+r /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm
+ rpm -ivh /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm
+ echo "Use rpm -e $(NAME) to remove the package"
+
+
diff --git a/makefile_count b/makefile_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/makefile_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count '#' $@
+
diff --git a/ml_count.c b/ml_count.c
new file mode 100644
index 0000000..dc18f35
--- /dev/null
+++ b/ml_count.c
@@ -0,0 +1,209 @@
+/* ml_count: given a list of ML files on the command line,
+ count the SLOC in each one. SLOC = physical, non-comment lines.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler and Michal Moskal
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+Michal Moskal may be contacted at malekith at pld-linux.org.
+
+ Based on c_count.c by:
+ (C) Copyright 2000 David A. Wheeler
+ Michal Moskal rewrote sloc_count() function, to support ML.
+
+ Usage: Use in one of the following ways:
+ ml_count # As filter
+ ml_count [-f file] [list_of_files]
+ file: file with a list of files to count (if "-", read list from stdin)
+ list_of_files: list of files to count
+
+ Michal Moskal states "It was easier to get string escaping and comment
+ nesting right in C then in Perl. It would be even easier in OCaml... ;-)"
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* Globals */
+long total_sloc;
+
+int peek(FILE *stream) {
+ int c = getc(stream);
+ ungetc(c, stream);
+ return c;
+}
+
+int ispeek(int c, FILE *stream) {
+ if (c == peek(stream)) {return 1;}
+ return 0;
+}
+
+long line_number;
+
+int getachar(FILE *stream) {
+/* Like getchar(), but keep track of line number. */
+ static int last_char_was_newline = 0;
+ int c;
+
+ c = getc(stream);
+ if (last_char_was_newline) line_number++;
+ if (c == '\n') last_char_was_newline=1;
+ else last_char_was_newline=0;
+ return c;
+}
+
+
+long sloc_count(char *filename, FILE *stream) {
+ /* Count the sloc in the program in stdin. */
+ long sloc = 0;
+
+ int sawchar = 0; /* Did you see a character on this line? */
+ int c;
+
+ int comment_lev = 0; /* Level of comment nesting. */
+ int in_string = 0; /* 0 or 1 */
+
+
+ while ((c = getachar(stream)) != EOF) {
+ switch (c) {
+ case '"':
+ in_string = !in_string;
+ break;
+
+ case '(':
+ if (!in_string && ispeek('*', stream)) {
+ comment_lev++;
+ getachar(stream); /* skip '*' */
+ }
+ break;
+
+ case '*':
+ if (comment_lev && !in_string && ispeek(')', stream)) {
+ comment_lev--;
+ getachar(stream); /* skip ')' */
+ continue /* while */;
+ }
+ break;
+
+ case '\\':
+ /* Ignore next character if in string. But don't ignore newlines. */
+ if (in_string && !ispeek('\n', stream))
+ getachar(stream);
+ break;
+
+ case ' ':
+ case '\t':
+ /* just ignore blanks */
+ continue /* while */;
+
+ case '\n':
+ if (sawchar) {
+ sloc++;
+ sawchar = 0;
+ }
+ continue /* while */;
+
+ default:
+ break;
+ }
+
+ if (comment_lev == 0)
+ sawchar = 1;
+ }
+
+ /* We're done with the file. Handle EOF-without-EOL. */
+ if (sawchar) sloc++;
+
+ if (comment_lev) {
+ fprintf(stderr, "ml_count ERROR - terminated in comment in %s\n", filename);
+ } else if (in_string) {
+ fprintf(stderr, "ml_count ERROR - terminated in string in %s\n", filename);
+ }
+
+ return sloc;
+}
+
+
+void count_file(char *filename) {
+ long sloc;
+ FILE *stream;
+
+ stream = fopen(filename, "r");
+ line_number = 1;
+ sloc = sloc_count(filename, stream);
+ total_sloc += sloc;
+ printf("%ld %s\n", sloc, filename);
+ fclose(stream);
+}
+
+char *read_a_line(FILE *file) {
+ /* Read a line in, and return a malloc'ed buffer with the line contents.
+ Any newline at the end is stripped.
+ If there's nothing left to read, returns NULL. */
+
+ /* We'll create a monstrously long buffer to make life easy for us: */
+ char buffer[10000];
+ char *returnval;
+ char *newlinepos;
+
+ returnval = fgets(buffer, sizeof(buffer), file);
+ if (returnval) {
+ newlinepos = buffer + strlen(buffer) - 1;
+ if (*newlinepos == '\n') {*newlinepos = '\0';};
+ return strdup(buffer);
+ } else {
+ return NULL;
+ }
+}
+
+
+int main(int argc, char *argv[]) {
+ long sloc;
+ int i;
+ FILE *file_list;
+ char *s;
+
+ total_sloc = 0;
+ line_number = 1;
+
+ if (argc <= 1) {
+ sloc = sloc_count("-", stdin);
+ printf("%ld %s\n", sloc, "-");
+ total_sloc += sloc;
+ } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) {
+ if (!strcmp (argv[2], "-")) {
+ file_list = stdin;
+ } else {
+ file_list = fopen(argv[2], "r");
+ }
+ if (file_list) {
+ while ((s = read_a_line(file_list))) {
+ count_file(s);
+ free(s);
+ }
+ }
+ } else {
+ for (i=1; i < argc; i++) { count_file(argv[i]); }
+ }
+ printf("Total:\n");
+ printf("%ld\n", total_sloc);
+ return 0; /* Report success */
+}
+
diff --git a/modula3_count b/modula3_count
new file mode 100644
index 0000000..fa2921d
--- /dev/null
+++ b/modula3_count
@@ -0,0 +1,65 @@
+#!/usr/bin/perl -w
+# modula3_count - count physical lines of code
+# Strips out (* .. *) and counts the rest.
+# Usage: modula3_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+ my $incomment = 0;
+
+ open (FILE, $file);
+ while (<FILE>) {
+ if ($incomment) {
+ if (m/\*\)/) { s/^.*?\*\)//; $incomment = 0;}
+ else { s/.*//; }
+ }
+ if (!$incomment) {
+ s!\(\*.*?\*\)!!g;
+ if (m/\(\*/) {
+ s/\(\*.*//;
+ $incomment = 1;
+ }
+ }
+ if (m/\S/) {$sloc++;}
+ }
+ print "$sloc $file\n";
+ if ($incomment) {print "ERROR: ended in comment in $file\n";}
+ $total_sloc += $sloc;
+ $sloc = 0;
+ $incomment = 0;
+ close (FILE);
+}
diff --git a/objc_count b/objc_count
new file mode 100755
index 0000000..a74bd5b
--- /dev/null
+++ b/objc_count
@@ -0,0 +1,89 @@
+#!/usr/bin/perl -w
+# objc_count - count physical lines of code
+# Strips out /* .. */ and counts the rest.
+# Usage: objc_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+ my $incomment = 0;
+
+ open (FILE, $file);
+ while (<FILE>) {
+ if ($incomment) {
+ if (m/\*\//) { s/^.*?\*\///; $incomment = 0;}
+ else { s/.*//; }
+ }
+ if (!$incomment) {
+ # s/\/\*.*?\*\//g;
+ s!\/\*.*?\*\/!!g;
+ if (m/\/\*/) {
+ s/\/\*.*//;
+ $incomment = 1;
+ }
+ }
+ if (m/\S/) {$sloc++;}
+ }
+ print "$sloc $file\n";
+ if ($incomment) {print "ERROR: ended in comment in $file\n";}
+ $total_sloc += $sloc;
+ $sloc = 0;
+ $incomment = 0;
+ close (FILE);
+}
diff --git a/pascal_count.c b/pascal_count.c
new file mode 100644
index 0000000..e7f870f
--- /dev/null
+++ b/pascal_count.c
@@ -0,0 +1,1714 @@
+/* A lexical scanner generated by flex */
+
+/* Scanner skeleton version:
+ * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $
+ */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+
+#include <stdio.h>
+#include <unistd.h>
+
+
+/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
+#ifdef c_plusplus
+#ifndef __cplusplus
+#define __cplusplus
+#endif
+#endif
+
+
+#ifdef __cplusplus
+
+#include <stdlib.h>
+
+/* Use prototypes in function declarations. */
+#define YY_USE_PROTOS
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else /* ! __cplusplus */
+
+#if __STDC__
+
+#define YY_USE_PROTOS
+#define YY_USE_CONST
+
+#endif /* __STDC__ */
+#endif /* ! __cplusplus */
+
+#ifdef __TURBOC__
+ #pragma warn -rch
+ #pragma warn -use
+#include <io.h>
+#include <stdlib.h>
+#define YY_USE_CONST
+#define YY_USE_PROTOS
+#endif
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+
+#ifdef YY_USE_PROTOS
+#define YY_PROTO(proto) proto
+#else
+#define YY_PROTO(proto) ()
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index. If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* Enter a start condition. This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN yy_start = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state. The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START ((yy_start - 1) / 2)
+#define YYSTATE YY_START
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart( yyin )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#define YY_BUF_SIZE 16384
+
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+
+extern int yyleng;
+extern FILE *yyin, *yyout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+/* The funky do-while in the following #define is used to turn the definition
+ * int a single C statement (which needs a semi-colon terminator). This
+ * avoids problems with code like:
+ *
+ * if ( condition_holds )
+ * yyless( 5 );
+ * else
+ * do_something_else();
+ *
+ * Prior to using the do-while the compiler would get upset at the
+ * "else" because it interpreted the "if" statement as being all
+ * done when it reached the ';' after the yyless() call.
+ */
+
+/* Return all but the first 'n' matched characters back to the input stream. */
+
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ *yy_cp = yy_hold_char; \
+ YY_RESTORE_YY_MORE_OFFSET \
+ yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
+ YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+ } \
+ while ( 0 )
+
+#define unput(c) yyunput( c, yytext_ptr )
+
+/* The following is because we cannot portably get our hands on size_t
+ * (without autoconf's help, which isn't available because we want
+ * flex-generated scanners to compile on their own).
+ */
+typedef unsigned int yy_size_t;
+
+
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ yy_size_t yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ int yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+ /* When an EOF's been seen but there's still some text to process
+ * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+ * shouldn't try reading from the input source any more. We might
+ * still have a bunch of tokens to match, though, because of
+ * possible backing-up.
+ *
+ * When we actually see the EOF, we change the status to "new"
+ * (via yyrestart()), so that the user can continue scanning by
+ * just pointing yyin at a new input file.
+ */
+#define YY_BUFFER_EOF_PENDING 2
+ };
+
+static YY_BUFFER_STATE yy_current_buffer = 0;
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ */
+#define YY_CURRENT_BUFFER yy_current_buffer
+
+
+/* yy_hold_char holds the character lost when yytext is formed. */
+static char yy_hold_char;
+
+static int yy_n_chars; /* number of characters read into yy_ch_buf */
+
+
+int yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = (char *) 0;
+static int yy_init = 1; /* whether we need to initialize */
+static int yy_start = 0; /* start state number */
+
+/* Flag which is used to allow yywrap()'s to do buffer switches
+ * instead of setting up a fresh yyin. A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void yyrestart YY_PROTO(( FILE *input_file ));
+
+void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer ));
+void yy_load_buffer_state YY_PROTO(( void ));
+YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size ));
+void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b ));
+void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file ));
+void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b ));
+#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer )
+
+YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size ));
+YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str ));
+YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len ));
+
+static void *yy_flex_alloc YY_PROTO(( yy_size_t ));
+static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t ));
+static void yy_flex_free YY_PROTO(( void * ));
+
+#define yy_new_buffer yy_create_buffer
+
+#define yy_set_interactive(is_interactive) \
+ { \
+ if ( ! yy_current_buffer ) \
+ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
+ yy_current_buffer->yy_is_interactive = is_interactive; \
+ }
+
+#define yy_set_bol(at_bol) \
+ { \
+ if ( ! yy_current_buffer ) \
+ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
+ yy_current_buffer->yy_at_bol = at_bol; \
+ }
+
+#define YY_AT_BOL() (yy_current_buffer->yy_at_bol)
+
+
+#define yywrap() 1
+#define YY_SKIP_YYWRAP
+typedef unsigned char YY_CHAR;
+FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
+typedef int yy_state_type;
+extern char *yytext;
+#define yytext_ptr yytext
+static yyconst short yy_nxt[][10] =
+ {
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ },
+
+ {
+ 9, 10, 11, 12, 13, 14, 10, 10, 15, 10
+ },
+
+ {
+ 9, 10, 11, 12, 13, 14, 10, 10, 15, 10
+ },
+
+ {
+ 9, 16, 16, 17, 16, 16, 16, 18, 16, 16
+ },
+
+ {
+ 9, 16, 16, 17, 16, 16, 16, 18, 16, 16
+ },
+
+ {
+ 9, 19, 19, 20, 19, 19, 19, 19, 19, 21
+ },
+
+ {
+ 9, 19, 19, 20, 19, 19, 19, 19, 19, 21
+ },
+
+ {
+ 9, 22, 22, 23, 24, 22, 22, 22, 22, 22
+ },
+
+ {
+ 9, 22, 22, 23, 24, 22, 22, 22, 22, 22
+ },
+
+ {
+ -9, -9, -9, -9, -9, -9, -9, -9, -9, -9
+
+ },
+
+ {
+ 9, 25, 25, -10, -10, -10, 25, 25, -10, 25
+ },
+
+ {
+ 9, -11, -11, -11, -11, -11, -11, -11, -11, -11
+ },
+
+ {
+ 9, -12, -12, -12, -12, -12, -12, -12, -12, -12
+ },
+
+ {
+ 9, -13, -13, -13, -13, -13, -13, -13, -13, -13
+ },
+
+ {
+ 9, -14, -14, -14, -14, -14, -14, 26, -14, -14
+ },
+
+ {
+ 9, -15, -15, -15, -15, -15, -15, -15, -15, -15
+ },
+
+ {
+ 9, 27, 27, 28, 27, 27, 27, -16, 27, 27
+ },
+
+ {
+ 9, -17, -17, -17, -17, -17, -17, -17, -17, -17
+ },
+
+ {
+ 9, 29, 29, 30, 29, 29, 31, 32, 29, 29
+ },
+
+ {
+ 9, 33, 33, 34, 33, 33, 33, 33, 33, -19
+
+ },
+
+ {
+ 9, -20, -20, -20, -20, -20, -20, -20, -20, -20
+ },
+
+ {
+ 9, -21, -21, -21, -21, -21, -21, -21, -21, -21
+ },
+
+ {
+ 9, 35, 35, -22, -22, 35, 35, 35, 35, 35
+ },
+
+ {
+ 9, -23, -23, -23, -23, -23, -23, -23, -23, -23
+ },
+
+ {
+ 9, -24, -24, -24, 36, -24, -24, -24, -24, -24
+ },
+
+ {
+ 9, 25, 25, -25, -25, -25, 25, 25, -25, 25
+ },
+
+ {
+ 9, -26, -26, -26, -26, -26, -26, -26, -26, -26
+ },
+
+ {
+ 9, 27, 27, 28, 27, 27, 27, -27, 27, 27
+ },
+
+ {
+ 9, -28, -28, -28, -28, -28, -28, -28, -28, -28
+ },
+
+ {
+ 9, 29, 29, 30, 29, 29, -29, -29, 29, 29
+
+ },
+
+ {
+ 9, -30, -30, -30, -30, -30, -30, -30, -30, -30
+ },
+
+ {
+ 9, -31, -31, -31, -31, -31, -31, -31, -31, -31
+ },
+
+ {
+ 9, 29, 29, 30, 29, 29, 31, 32, 29, 29
+ },
+
+ {
+ 9, 33, 33, 34, 33, 33, 33, 33, 33, -33
+ },
+
+ {
+ 9, -34, -34, -34, -34, -34, -34, -34, -34, -34
+ },
+
+ {
+ 9, 35, 35, -35, -35, 35, 35, 35, 35, 35
+ },
+
+ {
+ 9, -36, -36, -36, -36, -36, -36, -36, -36, -36
+ },
+
+ } ;
+
+
+static yy_state_type yy_get_previous_state YY_PROTO(( void ));
+static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state ));
+static int yy_get_next_buffer YY_PROTO(( void ));
+static void yy_fatal_error YY_PROTO(( yyconst char msg[] ));
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+ yytext_ptr = yy_bp; \
+ yyleng = (int) (yy_cp - yy_bp); \
+ yy_hold_char = *yy_cp; \
+ *yy_cp = '\0'; \
+ yy_c_buf_p = yy_cp;
+
+#define YY_NUM_RULES 20
+#define YY_END_OF_BUFFER 21
+static yyconst short int yy_accept[37] =
+ { 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 21, 6,
+ 1, 3, 5, 7, 4, 8, 9, 10, 13, 14,
+ 15, 16, 17, 19, 6, 2, 8, 9, 10, 11,
+ 12, 10, 13, 14, 16, 18
+ } ;
+
+static yyconst int yy_ec[256] =
+ { 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 1, 1, 1, 1, 1, 1, 4, 5,
+ 6, 7, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 8, 1, 9, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+ } ;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *yytext;
+#line 1 "pascal_count.l"
+#define INITIAL 0
+#line 2 "pascal_count.l"
+
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+#include "driver.h"
+
+#define YY_NO_UNPUT
+
+/* 1 if we saw a non-comment, non-whitespace char on this line */
+int saw_char = 0;
+
+#define comment 1
+
+#define bcomment 2
+
+#define string 3
+
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap YY_PROTO(( void ));
+#else
+extern int yywrap YY_PROTO(( void ));
+#endif
+#endif
+
+#ifndef YY_NO_UNPUT
+static void yyunput YY_PROTO(( int c, char *buf_ptr ));
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int ));
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen YY_PROTO(( yyconst char * ));
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput YY_PROTO(( void ));
+#else
+static int input YY_PROTO(( void ));
+#endif
+#endif
+
+#if YY_STACK_USED
+static int yy_start_stack_ptr = 0;
+static int yy_start_stack_depth = 0;
+static int *yy_start_stack = 0;
+#ifndef YY_NO_PUSH_STATE
+static void yy_push_state YY_PROTO(( int new_state ));
+#endif
+#ifndef YY_NO_POP_STATE
+static void yy_pop_state YY_PROTO(( void ));
+#endif
+#ifndef YY_NO_TOP_STATE
+static int yy_top_state YY_PROTO(( void ));
+#endif
+
+#else
+#define YY_NO_PUSH_STATE 1
+#define YY_NO_POP_STATE 1
+#define YY_NO_TOP_STATE 1
+#endif
+
+#ifdef YY_MALLOC_DECL
+YY_MALLOC_DECL
+#else
+#if __STDC__
+#ifndef __cplusplus
+#include <stdlib.h>
+#endif
+#else
+/* Just try to get by without declaring the routines. This will fail
+ * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
+ * or sizeof(void*) != sizeof(int).
+ */
+#endif
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO (void) fwrite( yytext, yyleng, 1, yyout )
+#endif
+
+/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+ if ( yy_current_buffer->yy_is_interactive ) \
+ { \
+ int c = '*', n; \
+ for ( n = 0; n < max_size && \
+ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
+ buf[n] = (char) c; \
+ if ( c == '\n' ) \
+ buf[n++] = (char) c; \
+ if ( c == EOF && ferror( yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ result = n; \
+ } \
+ else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \
+ && ferror( yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" );
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL int yylex YY_PROTO(( void ))
+#endif
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+#define YY_RULE_SETUP \
+ YY_USER_ACTION
+
+YY_DECL
+ {
+ register yy_state_type yy_current_state;
+ register char *yy_cp = NULL, *yy_bp = NULL;
+ register int yy_act;
+
+#line 43 "pascal_count.l"
+
+ line_number = 1;
+ saw_char = 0;
+ BEGIN(INITIAL);
+
+
+ if ( yy_init )
+ {
+ yy_init = 0;
+
+#ifdef YY_USER_INIT
+ YY_USER_INIT;
+#endif
+
+ if ( ! yy_start )
+ yy_start = 1; /* first start state */
+
+ if ( ! yyin )
+ yyin = stdin;
+
+ if ( ! yyout )
+ yyout = stdout;
+
+ if ( ! yy_current_buffer )
+ yy_current_buffer =
+ yy_create_buffer( yyin, YY_BUF_SIZE );
+
+ yy_load_buffer_state();
+ }
+
+ while ( 1 ) /* loops until end-of-file is reached */
+ {
+ yy_cp = yy_c_buf_p;
+
+ /* Support of yytext. */
+ *yy_cp = yy_hold_char;
+
+ /* yy_bp points to the position in yy_ch_buf of the start of
+ * the current run.
+ */
+ yy_bp = yy_cp;
+
+ yy_current_state = yy_start;
+yy_match:
+ while ( (yy_current_state = yy_nxt[yy_current_state][yy_ec[YY_SC_TO_UI(*yy_cp)]]) > 0 )
+ ++yy_cp;
+
+ yy_current_state = -yy_current_state;
+
+yy_find_action:
+ yy_act = yy_accept[yy_current_state];
+
+ YY_DO_BEFORE_ACTION;
+
+
+do_action: /* This label is used only to access EOF actions. */
+
+
+ switch ( yy_act )
+ { /* beginning of action switch */
+case 1:
+YY_RULE_SETUP
+#line 48 "pascal_count.l"
+/* Do nothing */
+ YY_BREAK
+case 2:
+YY_RULE_SETUP
+#line 49 "pascal_count.l"
+{BEGIN(comment);}
+ YY_BREAK
+case 3:
+YY_RULE_SETUP
+#line 50 "pascal_count.l"
+{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+ YY_BREAK
+case 4:
+YY_RULE_SETUP
+#line 51 "pascal_count.l"
+{BEGIN(bcomment);}
+ YY_BREAK
+case 5:
+YY_RULE_SETUP
+#line 52 "pascal_count.l"
+{saw_char = 1; BEGIN(string);}
+ YY_BREAK
+case 6:
+YY_RULE_SETUP
+#line 53 "pascal_count.l"
+{saw_char = 1;}
+ YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 54 "pascal_count.l"
+{saw_char = 1;}
+ YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 56 "pascal_count.l"
+/* Do nothing */
+ YY_BREAK
+case 9:
+YY_RULE_SETUP
+#line 57 "pascal_count.l"
+{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+ YY_BREAK
+case 10:
+YY_RULE_SETUP
+#line 58 "pascal_count.l"
+/* Do nothing */
+ YY_BREAK
+case 11:
+YY_RULE_SETUP
+#line 59 "pascal_count.l"
+{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+ YY_BREAK
+case 12:
+YY_RULE_SETUP
+#line 60 "pascal_count.l"
+{BEGIN(INITIAL);}
+ YY_BREAK
+case 13:
+YY_RULE_SETUP
+#line 62 "pascal_count.l"
+/* Do nothing */
+ YY_BREAK
+case 14:
+YY_RULE_SETUP
+#line 63 "pascal_count.l"
+{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+ YY_BREAK
+case 15:
+YY_RULE_SETUP
+#line 64 "pascal_count.l"
+{BEGIN(INITIAL);}
+ YY_BREAK
+case 16:
+YY_RULE_SETUP
+#line 66 "pascal_count.l"
+{saw_char = 1;}
+ YY_BREAK
+case 17:
+YY_RULE_SETUP
+#line 67 "pascal_count.l"
+{
+ fprintf(stderr, "Warning: newline in string - file %s, line %ld\n",
+ filename, line_number);
+ if (saw_char) {sloc++; saw_char=0;};
+ BEGIN(INITIAL); /* Switch back; this at least limits damage */
+ line_number++;
+ }
+ YY_BREAK
+case 18:
+YY_RULE_SETUP
+#line 74 "pascal_count.l"
+{saw_char = 1;}
+ YY_BREAK
+case 19:
+YY_RULE_SETUP
+#line 75 "pascal_count.l"
+{saw_char = 1; BEGIN(INITIAL);}
+ YY_BREAK
+case 20:
+YY_RULE_SETUP
+#line 77 "pascal_count.l"
+ECHO;
+ YY_BREAK
+ case YY_STATE_EOF(INITIAL):
+ case YY_STATE_EOF(comment):
+ case YY_STATE_EOF(bcomment):
+ case YY_STATE_EOF(string):
+ yyterminate();
+
+ case YY_END_OF_BUFFER:
+ {
+ /* Amount of text matched not including the EOB char. */
+ int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1;
+
+ /* Undo the effects of YY_DO_BEFORE_ACTION. */
+ *yy_cp = yy_hold_char;
+ YY_RESTORE_YY_MORE_OFFSET
+
+ if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
+ {
+ /* We're scanning a new file or input source. It's
+ * possible that this happened because the user
+ * just pointed yyin at a new source and called
+ * yylex(). If so, then we have to assure
+ * consistency between yy_current_buffer and our
+ * globals. Here is the right place to do so, because
+ * this is the first action (other than possibly a
+ * back-up) that will match for the new input source.
+ */
+ yy_n_chars = yy_current_buffer->yy_n_chars;
+ yy_current_buffer->yy_input_file = yyin;
+ yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
+ }
+
+ /* Note that here we test for yy_c_buf_p "<=" to the position
+ * of the first EOB in the buffer, since yy_c_buf_p will
+ * already have been incremented past the NUL character
+ * (since all states make transitions on EOB to the
+ * end-of-buffer state). Contrast this with the test
+ * in input().
+ */
+ if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+ { /* This was really a NUL. */
+ yy_state_type yy_next_state;
+
+ yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state();
+
+ /* Okay, we're now positioned to make the NUL
+ * transition. We couldn't have
+ * yy_get_previous_state() go ahead and do it
+ * for us because it doesn't know how to deal
+ * with the possibility of jamming (and we don't
+ * want to build jamming into it because then it
+ * will run more slowly).
+ */
+
+ yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+ yy_bp = yytext_ptr + YY_MORE_ADJ;
+
+ if ( yy_next_state )
+ {
+ /* Consume the NUL. */
+ yy_cp = ++yy_c_buf_p;
+ yy_current_state = yy_next_state;
+ goto yy_match;
+ }
+
+ else
+ {
+ yy_cp = yy_c_buf_p;
+ goto yy_find_action;
+ }
+ }
+
+ else switch ( yy_get_next_buffer() )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ yy_did_buffer_switch_on_eof = 0;
+
+ if ( yywrap() )
+ {
+ /* Note: because we've taken care in
+ * yy_get_next_buffer() to have set up
+ * yytext, we can now set up
+ * yy_c_buf_p so that if some total
+ * hoser (like flex itself) wants to
+ * call the scanner after we return the
+ * YY_NULL, it'll still work - another
+ * YY_NULL will get returned.
+ */
+ yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
+
+ yy_act = YY_STATE_EOF(YY_START);
+ goto do_action;
+ }
+
+ else
+ {
+ if ( ! yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+ }
+ break;
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yy_c_buf_p =
+ yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state();
+
+ yy_cp = yy_c_buf_p;
+ yy_bp = yytext_ptr + YY_MORE_ADJ;
+ goto yy_match;
+
+ case EOB_ACT_LAST_MATCH:
+ yy_c_buf_p =
+ &yy_current_buffer->yy_ch_buf[yy_n_chars];
+
+ yy_current_state = yy_get_previous_state();
+
+ yy_cp = yy_c_buf_p;
+ yy_bp = yytext_ptr + YY_MORE_ADJ;
+ goto yy_find_action;
+ }
+ break;
+ }
+
+ default:
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--no action found" );
+ } /* end of action switch */
+ } /* end of scanning one token */
+ } /* end of yylex */
+
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ * EOB_ACT_LAST_MATCH -
+ * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ * EOB_ACT_END_OF_FILE - end of file
+ */
+
+static int yy_get_next_buffer()
+ {
+ register char *dest = yy_current_buffer->yy_ch_buf;
+ register char *source = yytext_ptr;
+ register int number_to_move, i;
+ int ret_val;
+
+ if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--end of buffer missed" );
+
+ if ( yy_current_buffer->yy_fill_buffer == 0 )
+ { /* Don't try to fill the buffer, so this is an EOF. */
+ if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
+ {
+ /* We matched a single character, the EOB, so
+ * treat this as a final EOF.
+ */
+ return EOB_ACT_END_OF_FILE;
+ }
+
+ else
+ {
+ /* We matched some text prior to the EOB, first
+ * process it.
+ */
+ return EOB_ACT_LAST_MATCH;
+ }
+ }
+
+ /* Try to read more data. */
+
+ /* First move last chars to start of buffer. */
+ number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1;
+
+ for ( i = 0; i < number_to_move; ++i )
+ *(dest++) = *(source++);
+
+ if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+ /* don't do the read, it's not guaranteed to return an EOF,
+ * just force an EOF
+ */
+ yy_current_buffer->yy_n_chars = yy_n_chars = 0;
+
+ else
+ {
+ int num_to_read =
+ yy_current_buffer->yy_buf_size - number_to_move - 1;
+
+ while ( num_to_read <= 0 )
+ { /* Not enough room in the buffer - grow it. */
+#ifdef YY_USES_REJECT
+ YY_FATAL_ERROR(
+"input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
+#else
+
+ /* just a shorter name for the current buffer */
+ YY_BUFFER_STATE b = yy_current_buffer;
+
+ int yy_c_buf_p_offset =
+ (int) (yy_c_buf_p - b->yy_ch_buf);
+
+ if ( b->yy_is_our_buffer )
+ {
+ int new_size = b->yy_buf_size * 2;
+
+ if ( new_size <= 0 )
+ b->yy_buf_size += b->yy_buf_size / 8;
+ else
+ b->yy_buf_size *= 2;
+
+ b->yy_ch_buf = (char *)
+ /* Include room in for 2 EOB chars. */
+ yy_flex_realloc( (void *) b->yy_ch_buf,
+ b->yy_buf_size + 2 );
+ }
+ else
+ /* Can't grow it, we don't own it. */
+ b->yy_ch_buf = 0;
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR(
+ "fatal error - scanner input buffer overflow" );
+
+ yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+ num_to_read = yy_current_buffer->yy_buf_size -
+ number_to_move - 1;
+#endif
+ }
+
+ if ( num_to_read > YY_READ_BUF_SIZE )
+ num_to_read = YY_READ_BUF_SIZE;
+
+ /* Read in more data. */
+ YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
+ yy_n_chars, num_to_read );
+
+ yy_current_buffer->yy_n_chars = yy_n_chars;
+ }
+
+ if ( yy_n_chars == 0 )
+ {
+ if ( number_to_move == YY_MORE_ADJ )
+ {
+ ret_val = EOB_ACT_END_OF_FILE;
+ yyrestart( yyin );
+ }
+
+ else
+ {
+ ret_val = EOB_ACT_LAST_MATCH;
+ yy_current_buffer->yy_buffer_status =
+ YY_BUFFER_EOF_PENDING;
+ }
+ }
+
+ else
+ ret_val = EOB_ACT_CONTINUE_SCAN;
+
+ yy_n_chars += number_to_move;
+ yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
+ yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
+
+ yytext_ptr = &yy_current_buffer->yy_ch_buf[0];
+
+ return ret_val;
+ }
+
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+static yy_state_type yy_get_previous_state()
+ {
+ register yy_state_type yy_current_state;
+ register char *yy_cp;
+
+ yy_current_state = yy_start;
+
+ for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
+ {
+ yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)];
+ }
+
+ return yy_current_state;
+ }
+
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ * next_state = yy_try_NUL_trans( current_state );
+ */
+
+#ifdef YY_USE_PROTOS
+static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state )
+#else
+static yy_state_type yy_try_NUL_trans( yy_current_state )
+yy_state_type yy_current_state;
+#endif
+ {
+ register int yy_is_jam;
+
+ yy_current_state = yy_nxt[yy_current_state][1];
+ yy_is_jam = (yy_current_state <= 0);
+
+ return yy_is_jam ? 0 : yy_current_state;
+ }
+
+
+#ifndef YY_NO_UNPUT
+#ifdef YY_USE_PROTOS
+static void yyunput( int c, register char *yy_bp )
+#else
+static void yyunput( c, yy_bp )
+int c;
+register char *yy_bp;
+#endif
+ {
+ register char *yy_cp = yy_c_buf_p;
+
+ /* undo effects of setting up yytext */
+ *yy_cp = yy_hold_char;
+
+ if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+ { /* need to shift things up to make room */
+ /* +2 for EOB chars. */
+ register int number_to_move = yy_n_chars + 2;
+ register char *dest = &yy_current_buffer->yy_ch_buf[
+ yy_current_buffer->yy_buf_size + 2];
+ register char *source =
+ &yy_current_buffer->yy_ch_buf[number_to_move];
+
+ while ( source > yy_current_buffer->yy_ch_buf )
+ *--dest = *--source;
+
+ yy_cp += (int) (dest - source);
+ yy_bp += (int) (dest - source);
+ yy_current_buffer->yy_n_chars =
+ yy_n_chars = yy_current_buffer->yy_buf_size;
+
+ if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+ YY_FATAL_ERROR( "flex scanner push-back overflow" );
+ }
+
+ *--yy_cp = (char) c;
+
+
+ yytext_ptr = yy_bp;
+ yy_hold_char = *yy_cp;
+ yy_c_buf_p = yy_cp;
+ }
+#endif /* ifndef YY_NO_UNPUT */
+
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput()
+#else
+static int input()
+#endif
+ {
+ int c;
+
+ *yy_c_buf_p = yy_hold_char;
+
+ if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
+ {
+ /* yy_c_buf_p now points to the character we want to return.
+ * If this occurs *before* the EOB characters, then it's a
+ * valid NUL; if not, then we've hit the end of the buffer.
+ */
+ if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+ /* This was really a NUL. */
+ *yy_c_buf_p = '\0';
+
+ else
+ { /* need more input */
+ int offset = yy_c_buf_p - yytext_ptr;
+ ++yy_c_buf_p;
+
+ switch ( yy_get_next_buffer() )
+ {
+ case EOB_ACT_LAST_MATCH:
+ /* This happens because yy_g_n_b()
+ * sees that we've accumulated a
+ * token and flags that we need to
+ * try matching the token before
+ * proceeding. But for input(),
+ * there's no matching to consider.
+ * So convert the EOB_ACT_LAST_MATCH
+ * to EOB_ACT_END_OF_FILE.
+ */
+
+ /* Reset buffer status. */
+ yyrestart( yyin );
+
+ /* fall through */
+
+ case EOB_ACT_END_OF_FILE:
+ {
+ if ( yywrap() )
+ return EOF;
+
+ if ( ! yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+#ifdef __cplusplus
+ return yyinput();
+#else
+ return input();
+#endif
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yy_c_buf_p = yytext_ptr + offset;
+ break;
+ }
+ }
+ }
+
+ c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */
+ *yy_c_buf_p = '\0'; /* preserve yytext */
+ yy_hold_char = *++yy_c_buf_p;
+
+
+ return c;
+ }
+#endif /* YY_NO_INPUT */
+
+#ifdef YY_USE_PROTOS
+void yyrestart( FILE *input_file )
+#else
+void yyrestart( input_file )
+FILE *input_file;
+#endif
+ {
+ if ( ! yy_current_buffer )
+ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
+
+ yy_init_buffer( yy_current_buffer, input_file );
+ yy_load_buffer_state();
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
+#else
+void yy_switch_to_buffer( new_buffer )
+YY_BUFFER_STATE new_buffer;
+#endif
+ {
+ if ( yy_current_buffer == new_buffer )
+ return;
+
+ if ( yy_current_buffer )
+ {
+ /* Flush out information for old buffer. */
+ *yy_c_buf_p = yy_hold_char;
+ yy_current_buffer->yy_buf_pos = yy_c_buf_p;
+ yy_current_buffer->yy_n_chars = yy_n_chars;
+ }
+
+ yy_current_buffer = new_buffer;
+ yy_load_buffer_state();
+
+ /* We don't actually know whether we did this switch during
+ * EOF (yywrap()) processing, but the only time this flag
+ * is looked at is after yywrap() is called, so it's safe
+ * to go ahead and always set it.
+ */
+ yy_did_buffer_switch_on_eof = 1;
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_load_buffer_state( void )
+#else
+void yy_load_buffer_state()
+#endif
+ {
+ yy_n_chars = yy_current_buffer->yy_n_chars;
+ yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
+ yyin = yy_current_buffer->yy_input_file;
+ yy_hold_char = *yy_c_buf_p;
+ }
+
+
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_create_buffer( FILE *file, int size )
+#else
+YY_BUFFER_STATE yy_create_buffer( file, size )
+FILE *file;
+int size;
+#endif
+ {
+ YY_BUFFER_STATE b;
+
+ b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_buf_size = size;
+
+ /* yy_ch_buf has to be 2 characters longer than the size given because
+ * we need to put in 2 end-of-buffer characters.
+ */
+ b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_is_our_buffer = 1;
+
+ yy_init_buffer( b, file );
+
+ return b;
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_delete_buffer( YY_BUFFER_STATE b )
+#else
+void yy_delete_buffer( b )
+YY_BUFFER_STATE b;
+#endif
+ {
+ if ( ! b )
+ return;
+
+ if ( b == yy_current_buffer )
+ yy_current_buffer = (YY_BUFFER_STATE) 0;
+
+ if ( b->yy_is_our_buffer )
+ yy_flex_free( (void *) b->yy_ch_buf );
+
+ yy_flex_free( (void *) b );
+ }
+
+
+
+#ifdef YY_USE_PROTOS
+void yy_init_buffer( YY_BUFFER_STATE b, FILE *file )
+#else
+void yy_init_buffer( b, file )
+YY_BUFFER_STATE b;
+FILE *file;
+#endif
+
+
+ {
+ yy_flush_buffer( b );
+
+ b->yy_input_file = file;
+ b->yy_fill_buffer = 1;
+
+#if YY_ALWAYS_INTERACTIVE
+ b->yy_is_interactive = 1;
+#else
+#if YY_NEVER_INTERACTIVE
+ b->yy_is_interactive = 0;
+#else
+ b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
+#endif
+#endif
+ }
+
+
+#ifdef YY_USE_PROTOS
+void yy_flush_buffer( YY_BUFFER_STATE b )
+#else
+void yy_flush_buffer( b )
+YY_BUFFER_STATE b;
+#endif
+
+ {
+ if ( ! b )
+ return;
+
+ b->yy_n_chars = 0;
+
+ /* We always need two end-of-buffer characters. The first causes
+ * a transition to the end-of-buffer state. The second causes
+ * a jam in that state.
+ */
+ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+ b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+ b->yy_buf_pos = &b->yy_ch_buf[0];
+
+ b->yy_at_bol = 1;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ if ( b == yy_current_buffer )
+ yy_load_buffer_state();
+ }
+
+
+#ifndef YY_NO_SCAN_BUFFER
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size )
+#else
+YY_BUFFER_STATE yy_scan_buffer( base, size )
+char *base;
+yy_size_t size;
+#endif
+ {
+ YY_BUFFER_STATE b;
+
+ if ( size < 2 ||
+ base[size-2] != YY_END_OF_BUFFER_CHAR ||
+ base[size-1] != YY_END_OF_BUFFER_CHAR )
+ /* They forgot to leave room for the EOB's. */
+ return 0;
+
+ b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
+
+ b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */
+ b->yy_buf_pos = b->yy_ch_buf = base;
+ b->yy_is_our_buffer = 0;
+ b->yy_input_file = 0;
+ b->yy_n_chars = b->yy_buf_size;
+ b->yy_is_interactive = 0;
+ b->yy_at_bol = 1;
+ b->yy_fill_buffer = 0;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ yy_switch_to_buffer( b );
+
+ return b;
+ }
+#endif
+
+
+#ifndef YY_NO_SCAN_STRING
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str )
+#else
+YY_BUFFER_STATE yy_scan_string( yy_str )
+yyconst char *yy_str;
+#endif
+ {
+ int len;
+ for ( len = 0; yy_str[len]; ++len )
+ ;
+
+ return yy_scan_bytes( yy_str, len );
+ }
+#endif
+
+
+#ifndef YY_NO_SCAN_BYTES
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len )
+#else
+YY_BUFFER_STATE yy_scan_bytes( bytes, len )
+yyconst char *bytes;
+int len;
+#endif
+ {
+ YY_BUFFER_STATE b;
+ char *buf;
+ yy_size_t n;
+ int i;
+
+ /* Get memory for full buffer, including space for trailing EOB's. */
+ n = len + 2;
+ buf = (char *) yy_flex_alloc( n );
+ if ( ! buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
+
+ for ( i = 0; i < len; ++i )
+ buf[i] = bytes[i];
+
+ buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR;
+
+ b = yy_scan_buffer( buf, n );
+ if ( ! b )
+ YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
+
+ /* It's okay to grow etc. this buffer, and we should throw it
+ * away when we're done.
+ */
+ b->yy_is_our_buffer = 1;
+
+ return b;
+ }
+#endif
+
+
+#ifndef YY_NO_PUSH_STATE
+#ifdef YY_USE_PROTOS
+static void yy_push_state( int new_state )
+#else
+static void yy_push_state( new_state )
+int new_state;
+#endif
+ {
+ if ( yy_start_stack_ptr >= yy_start_stack_depth )
+ {
+ yy_size_t new_size;
+
+ yy_start_stack_depth += YY_START_STACK_INCR;
+ new_size = yy_start_stack_depth * sizeof( int );
+
+ if ( ! yy_start_stack )
+ yy_start_stack = (int *) yy_flex_alloc( new_size );
+
+ else
+ yy_start_stack = (int *) yy_flex_realloc(
+ (void *) yy_start_stack, new_size );
+
+ if ( ! yy_start_stack )
+ YY_FATAL_ERROR(
+ "out of memory expanding start-condition stack" );
+ }
+
+ yy_start_stack[yy_start_stack_ptr++] = YY_START;
+
+ BEGIN(new_state);
+ }
+#endif
+
+
+#ifndef YY_NO_POP_STATE
+static void yy_pop_state()
+ {
+ if ( --yy_start_stack_ptr < 0 )
+ YY_FATAL_ERROR( "start-condition stack underflow" );
+
+ BEGIN(yy_start_stack[yy_start_stack_ptr]);
+ }
+#endif
+
+
+#ifndef YY_NO_TOP_STATE
+static int yy_top_state()
+ {
+ return yy_start_stack[yy_start_stack_ptr - 1];
+ }
+#endif
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+#ifdef YY_USE_PROTOS
+static void yy_fatal_error( yyconst char msg[] )
+#else
+static void yy_fatal_error( msg )
+char msg[];
+#endif
+ {
+ (void) fprintf( stderr, "%s\n", msg );
+ exit( YY_EXIT_FAILURE );
+ }
+
+
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ yytext[yyleng] = yy_hold_char; \
+ yy_c_buf_p = yytext + n; \
+ yy_hold_char = *yy_c_buf_p; \
+ *yy_c_buf_p = '\0'; \
+ yyleng = n; \
+ } \
+ while ( 0 )
+
+
+/* Internal utility routines. */
+
+#ifndef yytext_ptr
+#ifdef YY_USE_PROTOS
+static void yy_flex_strncpy( char *s1, yyconst char *s2, int n )
+#else
+static void yy_flex_strncpy( s1, s2, n )
+char *s1;
+yyconst char *s2;
+int n;
+#endif
+ {
+ register int i;
+ for ( i = 0; i < n; ++i )
+ s1[i] = s2[i];
+ }
+#endif
+
+#ifdef YY_NEED_STRLEN
+#ifdef YY_USE_PROTOS
+static int yy_flex_strlen( yyconst char *s )
+#else
+static int yy_flex_strlen( s )
+yyconst char *s;
+#endif
+ {
+ register int n;
+ for ( n = 0; s[n]; ++n )
+ ;
+
+ return n;
+ }
+#endif
+
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_alloc( yy_size_t size )
+#else
+static void *yy_flex_alloc( size )
+yy_size_t size;
+#endif
+ {
+ return (void *) malloc( size );
+ }
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_realloc( void *ptr, yy_size_t size )
+#else
+static void *yy_flex_realloc( ptr, size )
+void *ptr;
+yy_size_t size;
+#endif
+ {
+ /* The cast to (char *) in the following accommodates both
+ * implementations that use char* generic pointers, and those
+ * that use void* generic pointers. It works with the latter
+ * because both ANSI C and C++ allow castless assignment from
+ * any pointer type to void*, and deal with argument conversions
+ * as though doing an assignment.
+ */
+ return (void *) realloc( (char *) ptr, size );
+ }
+
+#ifdef YY_USE_PROTOS
+static void yy_flex_free( void *ptr )
+#else
+static void yy_flex_free( ptr )
+void *ptr;
+#endif
+ {
+ free( ptr );
+ }
+
+#if YY_MAIN
+int main()
+ {
+ yylex();
+ return 0;
+ }
+#endif
+#line 77 "pascal_count.l"
+
+
+#include "driver.c"
+
+
diff --git a/pascal_count.l b/pascal_count.l
new file mode 100644
index 0000000..cedfbb4
--- /dev/null
+++ b/pascal_count.l
@@ -0,0 +1,81 @@
+%{
+
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+#include "driver.h"
+
+#define YY_NO_UNPUT
+
+/* 1 if we saw a non-comment, non-whitespace char on this line */
+int saw_char = 0;
+
+%}
+
+%option noyywrap
+
+SPACE [ \t\n\r\f]
+
+%x comment
+%x bcomment
+%x string
+
+%%
+ line_number = 1;
+ saw_char = 0;
+ BEGIN(INITIAL);
+
+[ \t\r\f] /* Do nothing */
+"(*" {BEGIN(comment);}
+\n {if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+"{" {BEGIN(bcomment);}
+"'" {saw_char = 1; BEGIN(string);}
+[^ \t\r\f(\n{'][^(\n{']* {saw_char = 1;}
+. {saw_char = 1;}
+
+<comment>[^*\n]+ /* Do nothing */
+<comment>[^*\n]*\n {if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+<comment>"*"+[^*)\n]* /* Do nothing */
+<comment>"*"+[^*)\n]*\n {if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+<comment>"*"+")" {BEGIN(INITIAL);}
+
+<bcomment>[^}\n]+ /* Do nothing */
+<bcomment>[^}\n]*\n {if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+<bcomment>"}" {BEGIN(INITIAL);}
+
+<string>[^'\n]+ {saw_char = 1;}
+<string>\n {
+ fprintf(stderr, "Warning: newline in string - file %s, line %ld\n",
+ filename, line_number);
+ if (saw_char) {sloc++; saw_char=0;};
+ BEGIN(INITIAL); /* Switch back; this at least limits damage */
+ line_number++;
+ }
+<string>'' {saw_char = 1;}
+<string>' {saw_char = 1; BEGIN(INITIAL);}
+
+%%
+
+#include "driver.c"
+
+
diff --git a/perl_count b/perl_count
new file mode 100755
index 0000000..472ec33
--- /dev/null
+++ b/perl_count
@@ -0,0 +1,147 @@
+#!/usr/bin/perl
+# perl_count - count physical lines of code in Perl programs.
+# Usage: perl_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+
+# Physical lines of Perl are MUCH HARDER to count than you'd think.
+# Comments begin with "#".
+# Also, anything in a "perlpod" is a comment.
+# See perlpod(1) for more info; a perlpod starts with
+# \s*=command, can have more commands, and ends with \s*=cut.
+# Note that = followed by space is NOT a perlpod.
+# Although we ignore everything after __END__ in a file,
+# we will count everything after __DATA__; there's arguments for counting
+# and for not counting __DATA__.
+
+# What's worse, "here" documents must be COUNTED AS CODE, even if
+# they're FORMATTED AS A PERLPOD. Surely no one would do this, right?
+# Sigh... it can happen. See perl5.005_03/pod/splitpod.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+ my $isinpod = 0;
+ my $heredocument = ""; # If nonempty, identifies the ending marker.
+
+ open (FILE, $file);
+ while (<FILE>) {
+ s/#.*//; # Delete leading comments.
+ if ($heredocument and m/^\s*$heredocument/) {
+ $heredocument = ""; # finished here doc.
+ } elsif (m/<<\s*["']?([A-Za-z0-9_-]+)["']?[;,]\s*$/) {
+ # Beginning of a here document.
+ $heredocument = $1;
+ } elsif (!$heredocument && m/^\s*=cut/) { # Ending a POD?
+ if (! $isinpod) {
+ print stderr "cut without pod start in file $file line $.\n";
+ }
+ s/.*//; # Don't count the cut command.
+ $isinpod = 0;
+ } elsif (!$heredocument && m/^\s*=[a-zA-Z]/) { # Starting continuing a POD?
+ # Perlpods can have multiple contents, so it's okay if $isinpod == 1.
+ # Note that =(space) isn't a POD; library file perl5db.pl does this!
+ $isinpod = 1;
+ } elsif (m/^__END__/) { # Stop processing this file on __END__.
+ last;
+ }
+ if ((! $isinpod) && (m/\S/)) { $sloc++;}
+ }
+ # Show per-file & total; reset $isinpod per file.
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+ $sloc = 0;
+ if ($isinpod) {
+ print stderr "pod without closing cut in file $file\n";
+ }
+ # Reset state:
+ $isinpod = 0;
+ $heredocument = "";
+ close (FILE);
+}
+
+# The following is POD documentation; it should not be counted:
+=head1 Test
+=head2 testing
+=cut
+
+__END__
+# The following should not be counted in a line-counting program:
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+
+
diff --git a/php_count.c b/php_count.c
new file mode 100644
index 0000000..ee7ce10
--- /dev/null
+++ b/php_count.c
@@ -0,0 +1,335 @@
+/* php_count: given a list of C/C++/Java files on the command line,
+ count the SLOC in each one. SLOC = physical, non-comment lines.
+ This program knows about C++ and C comments (and how they interact),
+ and correctly ignores comment markers inside strings.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+
+ Usage: Use in one of the following ways:
+ php_count # As filter
+ php_count list_of_files # Counts for each file.
+ php_count -f fl # Counts the files listed in "fl".
+
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+
+/* If ALLOW_SHORT_TAGS is true, then <? all by itself begins PHP code. */
+#define ALLOW_SHORT_TAGS 1
+
+/* If ALLOW_ASP_TAGS is true, then <% begins PHP code. */
+#define ALLOW_ASP_TAGS 1
+
+
+
+/* Modes: PHP starts in "NONE", and <?php etc change mode to "NORMAL". */
+enum mode_t { NONE, NORMAL, INSTRING, INCOMMENT, INSINGLESTRING, HEREDOC };
+
+enum comment_t {ANSIC_STYLE, CPP_STYLE, SH_STYLE}; /* Types of comments */
+enum end_t {NORMAL_END, SCRIPT_END, ASP_END}; /* Type of ending to expect. */
+
+
+/* Globals */
+long total_sloc;
+
+long line_number;
+
+/* Handle input */
+
+/* Number of characters in one line, maximum. */
+/* The code uses fgets() so that longer lines are truncated & not a
+ buffer overflow hazard. */
+#define LONGEST_LINE 20000
+
+static char current_line[LONGEST_LINE];
+static char *clocation; /* points into current_line */
+static long sloc = 0;
+static int sawchar = 0; /* Did you see a character on this line? */
+static int beginning_of_line = 0;
+static int is_input_eof;
+
+void read_input_line(FILE *stream) {
+ /* Read in a new line - increment sloc if sawchar, & reset sawchar. */
+ if (feof(stream)) {
+ is_input_eof = 1;
+ return;
+ }
+ line_number++;
+ fgets(current_line, sizeof(current_line)-2, stream);
+ clocation = &(current_line[0]);
+ beginning_of_line = 1;
+ if (current_line[0] == '\0') is_input_eof = 1;
+ if (sawchar) {
+ /* printf("DEBUG: INCREMENTING SLOC\n"); */
+ sawchar = 0;
+ sloc++;
+ }
+}
+
+void init_input(FILE *stream) {
+ current_line[0] = '\0';
+ is_input_eof = 0;
+ sawchar = 0;
+ read_input_line(stream);
+}
+
+void consume_char(FILE *stream) {
+ /* returns TRUE if there are more characters in the input. */
+ beginning_of_line = 0;
+ if (!*clocation) read_input_line(stream);
+ else clocation++;
+}
+
+int match_consume(const char *m, FILE *stream) {
+ /* returns TRUE & most forward if matches, and consumes */
+ if (!*clocation) read_input_line(stream);
+ if (strncasecmp(m, clocation, strlen(m)) == 0) {
+ /* printf("MATCH: %s, %s\n", m, clocation); */
+ clocation += strlen(m);
+ beginning_of_line = 0;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+int current_char(FILE *stream) {
+ if (!*clocation) read_input_line(stream);
+ return *clocation;
+}
+
+char *rest_of_line(FILE *stream) {
+ /* returns rest of the line in a malloc'ed entry (caller must free()),
+ consuming it. */
+ char *result;
+
+ result = strdup(clocation);
+ read_input_line(stream);
+ return result;
+}
+
+
+void strstrip(char *s) {
+ /* Strip whitespace off the end of s. */
+ char *p;
+
+ /* Remove whitespace from the end by walking backwards. */
+ for (p= s + strlen(s) - 1; p >= s && isspace(*p); p--) {
+ *p = '\0';
+ }
+ return;
+}
+
+
+long sloc_count(char *filename, FILE *stream) {
+ /* Count the sloc in the program in stdin. */
+
+ enum mode_t mode = NONE; /* State machine state - NORMAL == PHP code */
+ enum comment_t comment_type; /* ANSIC_STYLE, CPP_STYLE, SH_STYLE */
+ enum end_t expected_end; /* The kind of ending expected, e.g. ?> */
+
+ char *heredoc_end;
+
+ sloc = 0;
+
+
+ /* The following implements a state machine with transitions; the
+ main state is "mode"; the transitions are triggered by character input. */
+
+ while (!is_input_eof) {
+ /* printf("mode=%d, current_char=%c\n", mode, current_char()); */
+ if (mode == NONE) {
+ /* Note: PHP will raise errors if something starts with
+ <?php and isn't followed by whitespace, e.g., <?phphello
+ is illegal. We won't look for this case, under the assumption
+ that someone won't bother to count malformed code. It's just
+ as well, anyway - it's few would think of doing it!
+ Note that simple <? followed by arbitrary characters is okay,
+ and is handled by the <? processing, so <?echo("hello")?> works. */
+ if (match_consume("<?php", stream)) {
+ expected_end = NORMAL_END;
+ mode = NORMAL;
+ } else if (ALLOW_SHORT_TAGS && match_consume("<?", stream)) {
+ expected_end = NORMAL_END;
+ mode = NORMAL;
+ /* FIXME: <script...> should be more flexible, allowing for
+ other attributes etc. I haven't seen this as a real problem. */
+ } else if (match_consume("<script language=\"php\">", stream)) {
+ expected_end = SCRIPT_END;
+ mode = NORMAL;
+ } else if (ALLOW_ASP_TAGS && match_consume("<%", stream)) {
+ expected_end = ASP_END;
+ mode = NORMAL;
+ } else consume_char(stream);
+ } else if (mode == NORMAL) {
+ if ((expected_end==NORMAL_END) && match_consume("?>", stream)) {
+ mode = NONE;
+ } else if ((expected_end==ASP_END) && match_consume("%>", stream)) {
+ mode = NONE;
+ } else if ((expected_end==SCRIPT_END) && match_consume("</script>", stream)) {
+ mode = NONE;
+ } else if (match_consume("\"", stream)) {
+ sawchar = 1;
+ mode = INSTRING;
+ } else if (match_consume("\'", stream)) {
+ sawchar = 1;
+ mode = INSINGLESTRING;
+ } else if (match_consume("/*", stream)) {
+ mode = INCOMMENT;
+ comment_type = ANSIC_STYLE;
+ } else if (match_consume("//", stream)) {
+ mode = INCOMMENT;
+ comment_type = CPP_STYLE;
+ } else if (match_consume("#", stream)) {
+ mode = INCOMMENT;
+ comment_type = SH_STYLE;
+ } else if (match_consume("<<<", stream)) {
+ mode = HEREDOC;
+ while (isspace(current_char(stream)) && !is_input_eof) {consume_char(stream);}
+ heredoc_end = rest_of_line(stream);
+ strstrip(heredoc_end);
+ } else {
+ if (!isspace(current_char(stream))) sawchar = 1;
+ consume_char(stream);
+ }
+ } else if (mode == INSTRING) {
+ /* We only count string lines with non-whitespace -- this is to
+ gracefully handle syntactically invalid programs.
+ You could argue that multiline strings with whitespace are
+ still executable and should be counted. */
+ if (!isspace(current_char(stream))) sawchar = 1;
+ if (match_consume("\"", stream)) {mode = NORMAL;}
+ else if (match_consume("\\\"", stream) || match_consume("\\\\", stream) ||
+ match_consume("\\\'", stream)) {}
+ else consume_char(stream);
+ } else if (mode == INSINGLESTRING) {
+ /* We only count string lines with non-whitespace; see above. */
+ if (!isspace(current_char(stream))) sawchar = 1;
+ if (current_char(stream) == '\'') {}
+ if (match_consume("'", stream)) {mode = NORMAL; }
+ else if (match_consume("\\\\", stream) || match_consume("\\\'", stream)) { }
+ else { consume_char(stream); }
+ } else if (mode == INCOMMENT) {
+ if ((comment_type == ANSIC_STYLE) && match_consume("*/", stream)) {
+ mode = NORMAL; }
+ /* Note: in PHP, must accept ending markers, even in a comment: */
+ else if ((expected_end==NORMAL_END) && match_consume("?>", stream))
+ { mode = NONE; }
+ else if ((expected_end==ASP_END) && match_consume("%>", stream)) { mode = NONE; }
+ else if ((expected_end==SCRIPT_END) && match_consume("</script>", stream))
+ { mode = NONE; }
+ else if ( ((comment_type == CPP_STYLE) || (comment_type == SH_STYLE)) &&
+ match_consume("\n", stream)) { mode = NORMAL; }
+ else consume_char(stream);
+ } else if (mode == HEREDOC) {
+ if (!isspace(current_char(stream))) sawchar = 1;
+ if (beginning_of_line && match_consume(heredoc_end, stream)) {
+ mode=NORMAL;
+ } else {
+ consume_char(stream);
+ }
+ } else {
+ fprintf(stderr, "Warning! Unknown mode in PHP file %s, mode=%d\n",
+ filename, mode);
+ consume_char(stream);
+ }
+ }
+ if (mode != NONE) {
+ fprintf(stderr, "Warning! Unclosed PHP file %s, mode=%d\n", filename, mode);
+ }
+
+ return sloc;
+}
+
+
+void count_file(char *filename) {
+ long sloc;
+ FILE *stream;
+
+ stream = fopen(filename, "r");
+ line_number = 0;
+ init_input(stream);
+ sloc = sloc_count(filename, stream);
+ fclose (stream);
+ total_sloc += sloc;
+ printf("%ld %s\n", sloc, filename);
+}
+
+char *read_a_line(FILE *file) {
+ /* Read a line in, and return a malloc'ed buffer with the line contents.
+ Any newline at the end is stripped.
+ If there's nothing left to read, returns NULL. */
+
+ /* We'll create a monstrously long buffer to make life easy for us: */
+ char buffer[10000];
+ char *returnval;
+ char *newlinepos;
+
+ returnval = fgets(buffer, sizeof(buffer), file);
+ if (returnval) {
+ newlinepos = buffer + strlen(buffer) - 1;
+ if (*newlinepos == '\n') {*newlinepos = '\0';};
+ return strdup(buffer);
+ } else {
+ return NULL;
+ }
+}
+
+
+int main(int argc, char *argv[]) {
+ long sloc;
+ int i;
+ FILE *file_list;
+ char *s;
+
+ total_sloc = 0;
+ line_number = 0;
+
+ if (argc <= 1) {
+ init_input(stdin);
+ sloc = sloc_count("-", stdin);
+ printf("%ld %s\n", sloc, "-");
+ total_sloc += sloc;
+ } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) {
+ if (!strcmp (argv[2], "-")) {
+ file_list = stdin;
+ } else {
+ file_list = fopen(argv[2], "r");
+ }
+ if (file_list) {
+ while ((s = read_a_line(file_list))) {
+ count_file(s);
+ free(s);
+ }
+ }
+ } else {
+ for (i=1; i < argc; i++) { count_file(argv[i]); }
+ }
+ printf("Total:\n");
+ printf("%ld\n", total_sloc);
+ exit(0);
+}
+
diff --git a/print_sum b/print_sum
new file mode 100755
index 0000000..f0ef453
--- /dev/null
+++ b/print_sum
@@ -0,0 +1,40 @@
+#!/usr/bin/perl
+# print_sum - read from stdin and print the sum.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$total = 0.0;
+
+while (<>) {
+ if (m/^\s*([\d.]+)/) {
+ $maybe_number = $1;
+ $maybe_number =~ s/\.*$//; # chop trailing ".".
+ if ($maybe_number =~ m/\d/) {
+ $total += $maybe_number;
+ }
+ }
+}
+
+print "$total\n";
+
diff --git a/print_sum_subset b/print_sum_subset
new file mode 100755
index 0000000..2db2496
--- /dev/null
+++ b/print_sum_subset
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+# print the sum of SLOC for a subset of a package.
+# The subset is specified using a regular expression.
+
+# To use, "cd" into the package data directory (with the "_outfile.dat" files),
+# then specify as the first parameter the pattern defining the subset.
+
+# E.G.:
+# cd ../data/linux
+# print_sum_subset 'BUILD\/linux\/drivers\/'
+#
+# will show the SLOC total for the "drivers" directory & subdirs
+# of the "linux" data subdirectory.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+echo -n "$1 "
+grep -h "$1" *_outfile.dat | grep '^[1-9][0-9]* [^ ]' | print_sum
+
diff --git a/python_count b/python_count
new file mode 100755
index 0000000..ab8c99e
--- /dev/null
+++ b/python_count
@@ -0,0 +1,120 @@
+#!/usr/bin/perl -w
+# python_count - count physical lines of code in Python programs.
+# Usage: python_count [-f file] [list_of_files]
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+# This is a trivial/naive program.
+
+# Comments begin with "#".
+# Python supports multi-line strings using """, which matches another """.
+# When not inside a multi-line string, a line whose first non-whitespace
+# is """ almost always indicates a programming comment;
+# this is also true for lines beginning with '"'
+# This means that a string which is part of an expression but which begins
+# a new line won't be counted; this problem is rare in practice.
+# This code DOES count _data_ inside a triple-quote (that's not a comment).
+# Note that this isn't true for single-quote, which is used in case
+# statements (etc.) but not in this context.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+ my $isintriple = 0; # A triple-quote is in effect.
+ my $isincomment = 0; # We are in a multiline (triple-quoted) comment.
+
+ open (FILE, $file);
+ while (<FILE>) {
+ if (! $isintriple) { # Normal case:
+ s/""".*"""//; # Delete triple-quotes that begin & end on the line.
+ s/^\s*"([^"]|(\\"))+"//; # Delete lonely strings starting on BOL.
+ s/#.*//; # Delete "#" comments.
+ if (m/"""/) { # Does a multiline triple-quote begin here?
+ $isintriple = 1;
+ if (m/^\s*"""/) {$isincomment = 1;} # It's a comment if at BOL.
+ }
+ } else { # we ARE in a triple.
+ if (m/"""/) {
+ if ($isincomment) {
+ s/.*?"""//; # Delete string text if it's a comment (not if data)
+ } else {
+ s/.*?"""/x/; # Leave something there to count.
+ }
+ # But wait! Another triple might start on this line!
+ # (see Python-1.5.2/Tools/freeze/makefreeze.py for an example)
+ if (m/"""/) {
+ # It did! No change in state!
+ } else {
+ $isintriple = 0;
+ $isincomment = 0;
+ }
+ }
+ }
+ # TO DEBUG:
+ # print "cmmnt=${isincomment} trp=${isintriple}: $_\n";
+ if ( (!$isincomment) && m/\S/) {$sloc++;};
+ }
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+ $sloc = 0;
+ if ($isintriple) {
+ print STDERR "No closing triple-doublequote-marks in file $file\n";
+ }
+ # Reset rest of state:
+ $isintriple = 0;
+ $isincomment = 0;
+ close (FILE); # Reset $. (line count) each time.
+}
diff --git a/redo_licenses b/redo_licenses
new file mode 100755
index 0000000..8580b38
--- /dev/null
+++ b/redo_licenses
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+# redo_licenses -- recompute licenses.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+SPECS=/usr/src/redhat/SPECS
+BUILD=/usr/src/redhat/BUILD
+
+cd $BUILD
+for builddir in *
+do
+ specfile=`cat ${builddir}/ORIGINAL_SPEC_FILE | head -1`
+ specfile=${SPECS}/$specfile
+ echo "builddir=${builddir}, specfile=${specfile}"
+ /root/extract_license "$builddir" "$specfile" > ${builddir}/PROGRAM_LICENSE
+ license=`cat ${builddir}/PROGRAM_LICENSE | head -1`
+ echo " $license"
+
+done
+
diff --git a/rpm_unpacker b/rpm_unpacker
new file mode 100755
index 0000000..1312066
--- /dev/null
+++ b/rpm_unpacker
@@ -0,0 +1,71 @@
+#!/bin/sh
+
+# unpacker -- unpacks RPMs into the BUILD directory.
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+SPECS=/usr/src/redhat/SPECS
+BUILD=/usr/src/redhat/BUILD
+
+BUILD_LIST=/root/build_list
+OLD_BUILD_LIST=${BUILD_LIST}.old
+
+echo "lost+found" > $OLD_BUILD_LIST
+
+cd $SPECS
+for specfile in *.spec
+do
+ cd $SPECS
+ # The "yes" is to give "yes" to "do you want to run this patch" requests -
+ # in particular this is needed to unpack samba.2.0.7 in Red Hat 7.1.
+ if yes | rpm -bp $specfile
+ then
+ # Things were fine, do nothing.
+ echo "UNPACKER: Successfully performed rpm -bp $specfile"
+ else
+ echo "UNPACKER WARNING - ERROR in rpm -bp $specfile"
+ fi
+
+ # Find the new BUILD entries, and create cross-references to the old.
+ cd $BUILD
+ ls | sort > $BUILD_LIST
+ CHANGES=`comm -13 $OLD_BUILD_LIST $BUILD_LIST`
+ anychange="0"
+ for newbuild in $CHANGES
+ do
+ anychange=1
+ echo $specfile > ${newbuild}/ORIGINAL_SPEC_FILE
+ echo "UNPACKER: added build $newbuild from $specfile"
+ extract_license "$newbuild" "${SPECS}/$specfile" > ${newbuild}/PROGRAM_LICENSE
+ # For disk space, erase all HTML files.
+ # If disk space is available, REMOVE THIS LINE:
+ # find "$newbuild" -type f -name "*.html" -exec rm {} \;
+ done
+ if [ $anychange == 0 ]
+ then
+ echo "UNPACKER: did not add a build directory for spec file $specfile"
+ fi
+ mv $BUILD_LIST $OLD_BUILD_LIST
+
+done
+
diff --git a/ruby_count b/ruby_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/ruby_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count '#' $@
+
diff --git a/sed_count b/sed_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/sed_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count '#' $@
+
diff --git a/sh_count b/sh_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/sh_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count '#' $@
+
diff --git a/show_filecount b/show_filecount
new file mode 100755
index 0000000..95f9707
--- /dev/null
+++ b/show_filecount
@@ -0,0 +1,58 @@
+#!/bin/sh
+# given a list of data subdirs, show how many files of each type
+# are in each subdir.
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+echo "Dir C C++ Python Asm sh csh Java Lisp Tcl Expect Perl ML Modula3 awk sed make not auto unknown"
+
+for dir in $@
+do
+ if [ -d "$dir" ]
+ then
+ echo $dir | tr -d '\n '; echo -n " "
+ wc -l < $dir/ansic_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/cpp_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/python_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/asm_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/sh_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/csh_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/java_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/lisp_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/tcl_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/exp_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/perl_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/ml_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/modula3_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/awk_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/sed_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/makefile_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/not_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/auto_list.dat | tr -d '\n '; echo -n " "
+ wc -l < $dir/unknown_list.dat | tr -d '\n '; echo -n " "
+ echo
+ fi
+done
+
+
diff --git a/simplecount b/simplecount
new file mode 100755
index 0000000..4c9b125
--- /dev/null
+++ b/simplecount
@@ -0,0 +1,84 @@
+#!/usr/bin/perl -w
+
+# simplecount
+# Usage: simple_count commentstart [-f file] [list_of_files]
+# commentstart: string that begins a comment (continuing til end-of-line)
+# file: file with a list of files to count (if "-", read list from stdin)
+# list_of_files: list of files to count
+# -f file or list_of_files can be used, or both
+
+# prints the number of nonblank lines after stripping comments
+# (comments begin with comment-char and continue to end-of-line
+# This is naive, and can be fooled by comment chars in strings, but
+# that's not a significant problem.
+
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$commentchar = shift;
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+
+ open (FILE, $file);
+ while(<FILE>) {
+ s/${commentchar}.*//; # Strip away any commments.
+ if (m/\S/) {
+ $sloc++
+ }; # Nonwhitespace in the line, count it!
+ }
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+ close (FILE);
+}
diff --git a/sloccount b/sloccount
new file mode 100755
index 0000000..9491227
--- /dev/null
+++ b/sloccount
@@ -0,0 +1,258 @@
+#!/bin/sh
+
+# This is the front-end program "sloccount", part of the
+# SLOCCount tool suite by David A. Wheeler.
+# Given a list of directories, compute the SLOC count,
+# automatically creating the directory $HOME/.slocdata.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+
+# See the SLOCCount documentation if you need
+# more details about the license.
+
+version=2.26
+
+if [ "$#" -eq 0 ]
+then
+ echo "Error: You must provide a directory or directories of source code."
+ exit 1
+fi
+
+startingdir=`pwd`
+
+
+# "datadir" is some suitable safe place for the data; here's the default:
+datadir=${HOME}/.slocdata
+
+details="n"
+cached="n"
+append="n" # If "append", then don't delete datadir, just add to it.
+oneprogram="--oneprogram"
+display_results="n"
+duplicate_control=""
+autogen=""
+filecount=""
+wide=""
+effort_model=""
+personcost=""
+overhead=""
+follow=""
+addlang=""
+notopfollow=""
+showother=""
+
+# Perl 5.8.0 handles the "LANG" environment variable oddly;
+# if it includes ".UTF-8" (which is does in Red Hat Linux 9 and others)
+# then it will bitterly complain about ordinary text.
+# So, we'll need to filter ".UTF-8" out of LANG.
+if [ x"$LANG" != x ]
+then
+ LANG=`echo "$LANG" | sed -e 's/\.UTF-8//'`
+ export LANG
+ # echo "New LANG variable: $LANG"
+fi
+
+while [ "$#" -gt 0 ]
+do
+ case "$1"
+ in
+ --version) echo "$version"
+ exit 0;;
+ --cached) cached="y"
+ shift;;
+ --append) append="y"
+ shift;;
+ --follow) follow="--follow"
+ shift;;
+ --notopfollow) notopfollow="--notopfollow"
+ shift;;
+ --datadir) shift
+ if [ ! -d "$1" ]
+ then
+ echo "Error: $1 is not a directory"
+ exit 1
+ fi
+ cd "$1"
+ datadir=`pwd`
+ cd $startingdir
+ shift;;
+ --duplicates) duplicate_control="$1"
+ shift;;
+ --crossdups) duplicate_control="$1"
+ shift;;
+ --autogen) autogen="$1"
+ shift;;
+ --multiproject) oneprogram=""
+ shift;;
+ --filecount) filecount="$1"
+ shift;;
+ --filecounts) filecount="$1"
+ shift;;
+ --wide) wide="$1"
+ shift;;
+ --details) details="y"
+ shift;;
+ --addlang) addlang="$addlang $1 $2"
+ shift; shift;;
+ --addlangall) addlang="--addlangall"
+ shift;;
+ --showother) showother="--showother"
+ shift;;
+ --effort) effort_model="$1 $2 $3"
+ shift; shift; shift;;
+ --schedule) schedule_model="$1 $2 $3"
+ shift; shift; shift;;
+ --personcost) personcost="$1 $2"
+ shift; shift;;
+ --overhead) overhead="$1 $2"
+ shift; shift;;
+ --) break;;
+ --*) echo "Error: no such option $1"
+ exit 1;;
+ *) break;;
+ esac
+done
+
+# --duplicates) duplicate_control="$1"
+# --autogen) autogen="$1"
+# $follow
+
+case "$cached"
+in
+ y)
+ if [ -n "$duplicate_control" -o -n "$autogen" -o -n "$follow" ]
+ then
+ echo "Warning: You cannot control what files are selected in a cache."
+ echo "The option '--cached' disables --duplicates, --crossdups,"
+ echo "--autogen, and --follow. Remove the --cached option if you"
+ echo "are changing what you wish to include in your calculations."
+ echo
+ fi
+ if [ -d "$datadir" ]
+ then
+ display_results="y"
+ else
+ echo "Sorry, data directory $datadir does not exist."
+ exit 1
+ fi;;
+ n) # Not cached -- need to compute the results.
+
+ if [ "$append" = "n" ]; then
+ if [ -r "${datadir}/sloc_noerase" ]; then
+ echo "ERROR! This data directory is marked as do-not-erase."
+ echo "Remove the file ${datadir}/sloc_noerase to erase it."
+ exit 1
+ fi
+ if [ "$#" -eq 0 ]; then
+ echo "ERROR! No directories for initial analysis supplied."
+ exit 1
+ fi
+ rm -fr "$datadir"
+ mkdir "$datadir"
+ fi
+
+ # Now that "datadir" exists, first test to make sure wc -l works.
+ wctestfile=${datadir}/.wctestfile
+ echo "" > $wctestfile
+ echo "line two" >> $wctestfile
+ echo "line three" >> $wctestfile
+ echo "line four" >> $wctestfile
+ testcount=`wc -l < ${wctestfile} | sed -e 's/ //g'`
+ if [ "$testcount" -ne 4 ]
+ then
+ echo "FAILURE! Your wc program's -l option produces wrong results."
+ echo "Update your wc (probably in a textutils package) to a correct version."
+ exit 1
+ fi
+
+
+ # Split things up if we're given only one directory on the argument line
+ # and that directory has more than one subdirectory:
+ split_things_up="n"
+ if [ "$#" -eq 1 ]
+ then
+ count=0
+ for x in $1/*
+ do
+ if [ -d "$x" ]
+ then
+ count=`expr $count + 1`
+ if [ $count -gt 1 ]
+ then
+ split_things_up="y"
+ break
+ fi
+ fi
+ done
+ fi
+ # If we're appending, don't split things up.
+ if [ "$append" = "y" ]
+ then
+ split_things_up="n"
+ fi
+
+ case $split_things_up
+ in
+ y) make_filelists $follow $notopfollow --datadir "$datadir" --skip src "$1"/* ||
+ exit 1
+ if [ -d "$1"/src ]
+ then
+ make_filelists $notopfollow --datadir "$datadir" --prefix "src_" "$1"/src/* ||
+ exit 1
+ fi
+ ;;
+ *) make_filelists $follow $notopfollow --datadir "$datadir" "$@" || exit 1
+ ;;
+ esac
+
+ cd $datadir
+ if echo "Categorizing files." &&
+ break_filelist --duplistfile sloc_hashes $duplicate_control $autogen * &&
+ echo "Computing results." &&
+ compute_all *
+ then
+ display_results=y
+ fi
+ echo
+ echo
+ ;;
+esac
+
+# If we're appending, don't display results.
+if [ "$append" = "y" ]
+then
+ display_results="n"
+ echo "To display results, use the --cached option."
+fi
+
+
+case $display_results
+in
+ y)
+ cd $datadir
+ case $details
+ in
+ y) get_sloc_details * ;;
+ *) get_sloc $addlang $showother $filecount $oneprogram $effort_model $schedule_model $personcost $overhead * ;;
+ esac;;
+esac
+
diff --git a/sloccount.1 b/sloccount.1
new file mode 100644
index 0000000..8a5820c
--- /dev/null
+++ b/sloccount.1
@@ -0,0 +1,235 @@
+'\"
+.\" (C) Copyright 2001-2004 David A. Wheeler (dwheeler at dwheeler.com)
+.\"
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 2 of the License, or
+.\" (at your option) any later version.
+.\"
+.\" This program is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with this program; if not, write to the Free Software
+.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+.\"
+.\" David A. Wheeler's website is http://www.dwheeler.com
+.\"
+.\" Created Mon Jan 08 23:00:00 2001, David A. Wheeler (dwheeler at dwheeler.com)
+.\"
+.TH SLOCCOUNT 1 "31 July 2004" "SLOCCount" "SLOCCount"
+.SH NAME
+sloccount \- count source lines of code (SLOC)
+.SH SYNOPSIS
+.B sloccount
+.RB [ --version ]
+.RB [ --cached ]
+.RB [ --append ]
+[ \fB--datadir\fR \fIdirectory\fR ]
+.RB [ --follow ]
+.RB [ --duplicates ]
+.RB [ --crossdups ]
+.RB [ --autogen ]
+.RB [ --multiproject ]
+.RB [ --filecount ]
+.RB [ --wide ]
+.RB [ --details ]
+[ \fB--effort\fR \fIF\fR \fIE\fR ]
+[ \fB--schedule\fR \fIF\fR \fIE\fR ]
+[ \fB--personcost\fR \fIcost\fR ]
+[ \fB--overhead\fR \fIoverhead\fR ]
+[ \fB--addlang\fR \fIlanguage\fR ]
+[ \fB--addlangall\fR ]
+.RB [ -- ]
+.I directories
+.SH DESCRIPTION
+.PP
+sloccount counts the physical source lines of code (SLOC)
+contained in descendants of the specified set of directories.
+It automatically determines which files are source code files,
+and it automatically determines the computer language used in each file.
+By default it summarizes the SLOC results and presents various estimates
+(such as effort and cost to develop),
+but its output can be controlled by various options.
+.PP
+If you give sloccount a list of two or more directories, the counts
+will be broken up according to that list.
+There is one important limitation:
+the basenames of the directories given as parameters must be different,
+because those names are used to group summary information.
+Thus, you can't run "sloccount /usr/src/hello /usr/local/src/hello".
+This limitation only applies to parameters of sloccount - subdirectories
+descended from the top directories can have the same basename.
+.PP
+If you give sloccount only a single directory, sloccount tries
+to automatically find a reasonable breakdown for purposes of reporting
+(so it'll produce a useful report).
+In this case, if the directory has at least
+two subdirectories, then those subdirectories will be used as the
+breakdown.
+If the single directory contains files as well as directories
+(or if you give sloccount some files as parameters), those files will
+be assigned to the directory "top_dir" so you can tell them apart
+from other contents.
+Finally, if there's a subdirectory named "src", then that subdirectory is
+again broken down, with all the further subdirectories prefixed with "src_".
+Thus, if directory "X" has a subdirectory "src", which contains subdirectory
+"modules", sloccount will report a separate count for "src_modules".
+.PP
+sloccount normally considers all descendants of these directories,
+though unless told otherwise it ignores symbolic links.
+.PP
+sloccount is the usual front-end of the package of tools named "SLOCCount".
+Note that the name of the entire package has capital letters, while
+the name of this front-end program does not.
+.PP
+sloccount will normally report estimates of schedule time, effort, and
+cost, and for single projects it also estimates
+the average number of active developers.
+These are merely estimates, not etched in stone; you can modify the
+parameters used to improve the estimates.
+
+
+.SH OPTIONS
+.TP 12
+.BI --version
+Report the version number of SLOCCount and immediately exit.
+This option can't be usefully combined with any other option.
+
+.TP 12
+.BI --cached
+Do not recalculate; instead, use cached results from a previous execution.
+Without the --cached or --append option,
+sloccount automatically removes the data directory
+and recreates it.
+
+.TP 12
+.BI --append
+Do not remove previous calculations from the data directory;
+instead, add the analysis to the current contents of the data directory.
+
+.TP
+.BI --datadir " directory"
+Store or use cached data in the given data directory; default value
+is "~/.slocdata".
+
+.TP
+.BI --follow
+Follow symbolic links.
+
+.TP
+.BI --duplicates
+Count all duplicates.
+Normally, if files have equal content (as determined using
+MD5 hash values), only one is counted.
+
+.TP
+.BI --crossdups
+Count duplicates if they occur in different portions of the breakdown.
+Thus, if the top directory contains many different projects, and you
+want the duplicates in different projects to count in each project,
+choose this option.
+
+.TP
+.BI --autogen
+Count source code files that appear to be automatically generated.
+Normally these are excluded.
+
+.TP
+.BI --multiproject
+The different directories represent different projects;
+otherwise, it's assumed that all of the source code belongs
+to a single project.
+This doesn't change the total number of files or SLOC values, but
+it does affect the effort and schedule estimates.
+Given this option,
+effort is computed separately for each project (and then summed),
+and the schedule is the estimated schedule of the largest project.
+
+.TP
+.BI --filecount
+Display counts of files instead of SLOC.
+
+.TP
+.BI --wide
+Display in the "wide" (tab-separated) format.
+
+.TP
+.BI --details
+Display details, that is, results for every source code file.
+
+.TP
+.BI --effort " F E"
+Change the factor and exponent for the effort model.
+Effort (in person-months) is computed as F*(SLOC/1000)^E.
+
+.TP
+.BI --schedule " F E"
+Change the factor and exponent for the schedule model.
+Schedule (in months) is computed as F*(effort)^E.
+
+.TP
+.BI --personcost " cost"
+Change the average annual salary to
+.IR cost .
+
+.TP
+.BI --overhead " overhead"
+Change the overhead value to
+.IR overhead .
+Estimated cost is computed as effort * personcost * overhead.
+
+.TP
+.BI --addlang " language"
+Add a language not considered by default to be a ``language'' to be
+reported.
+Currently the only legal values for language are "makefile", "sql",
+and "html".
+These files
+are not normally included in the SLOC counts, although their SLOCs are
+internally calculated and they are shown in the file counts.
+If you want to include more than one such language, do it by
+passing --addlang more than once, e.g., --addlang makefile --addlang sql.
+
+.TP
+.BI --addlangall
+Add all languages not normally included in final reports.
+
+.SH "NOTES"
+As with many other programs using Unix-like options,
+directories whose names begin with a
+dash (``-'') can be misinterpreted as options.
+If the directories to be analyzed might begin with a dash, use the
+double-dash (``-\ -'') to indicate the end of the option list
+before listing the directories.
+
+.SH "BUGS"
+Filenames with embedded newlines (in the directories or their
+descendants) won't be handled correctly; they will be interpreted
+as separate filenames where the newlines are inserted.
+An attacker could prevent sloccount from working by
+creating filenames of the form /normal/directory ... NEWLINE/dev/zero.
+Such filenames are exceedingly rare in source code because they're a pain
+to work with using other tools, too.
+Future versions of sloccount may internally use NUL-separated filenames
+(like GNU find's -print0 command) to fix this.
+
+There are many more languages not yet handled by SLOCCount.
+
+SLOCCount only reports physical source lines of code.
+It would be
+very useful if it could also report logical lines of code, and perhaps
+other common metrics such as McCabe's complexity measures
+and complexity density (complexity/SLOC for each function or procedure).
+
+
+.SH "SEE ALSO"
+See the SLOCCount website at http://www.dwheeler.com/sloccount.
+Note that more detailed documentation is available both on the website
+and with the SLOCCount package.
+
+.SH AUTHOR
+David A. Wheeler (dwheeler@dwheeler.com).
+
diff --git a/sloccount.1.gz b/sloccount.1.gz
new file mode 100644
index 0000000..33d29e9
--- /dev/null
+++ b/sloccount.1.gz
Binary files differ
diff --git a/sloccount.html b/sloccount.html
new file mode 100644
index 0000000..233ae9a
--- /dev/null
+++ b/sloccount.html
@@ -0,0 +1,2464 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<title>SLOCCount User's Guide</title>
+</head>
+<body bgcolor="#FFFFFF">
+<center>
+<font size="+3"><b><span class="title">SLOCCount User's Guide</span></b></font>
+<br>
+<font size="+2"><span class="author">by David A. Wheeler (dwheeler, at, dwheeler.com)</span></font>
+<br>
+<font size="+2"><span class="pubdate">August 1, 2004</span></font>
+<br>
+<font size="+2"><span class="version">Version 2.26</span></font>
+</center>
+<p>
+<h1><a name="introduction">Introduction</a></h1>
+<p>
+SLOCCount (pronounced "sloc-count") is a suite of programs for counting
+physical source lines of code (SLOC) in potentially large software systems.
+Thus, SLOCCount is a "software metrics tool" or "software measurement tool".
+SLOCCount was developed by David A. Wheeler,
+originally to count SLOC in a GNU/Linux distribution, but it can be
+used for counting the SLOC of arbitrary software systems.
+<p>
+SLOCCount is known to work on Linux systems, and has been tested
+on Red Hat Linux versions 6.2, 7, and 7.1.
+SLOCCount should run on many other Unix-like systems (if Perl is installed),
+in particular, I would expect a *BSD system to work well.
+Windows users can run sloccount by first installing
+<a href="http://sources.redhat.com/cygwin">Cygwin</a>.
+SLOCCount is much slower on Windows/Cygwin, and it's not as easy to install
+or use on Windows, but it works.
+Of course, feel free to upgrade to an open source Unix-like system
+(such as Linux or *BSD) instead :-).
+<p>
+SLOCCount can count physical SLOC for a wide number of languages.
+Listed alphabetically, they are
+Ada, Assembly (for many machines and assemblers),
+awk (including gawk and nawk),
+Bourne shell (and relatives such as bash, ksh, zsh, and pdksh),
+C, C++, C# (also called C-sharp or cs), C shell (including tcsh),
+COBOL, Expect, Fortran (including Fortran 90), Haskell,
+Java, lex (including flex),
+LISP (including Scheme),
+makefiles (though they aren't usually shown in final reports),
+Modula3, Objective-C, Pascal, Perl, PHP, Python, Ruby, sed,
+SQL (normally not shown),
+TCL, and Yacc.
+It can gracefully handle awkward situations in many languages,
+for example, it can determine the
+syntax used in different assembly language files and adjust appropriately,
+it knows about Python's use of string constants as comments, and it
+can handle various Perl oddities (e.g., perlpods, here documents,
+and Perl's _&nbsp;_END_&nbsp;_ marker).
+It even has a "generic" SLOC counter that you may be able to use count the
+SLOC of other languages (depending on the language's syntax).
+<p>
+SLOCCount can also take a large list of files and automatically categorize
+them using a number of different heuristics.
+The heuristics automatically determine if a file
+is a source code file or not, and if so, which language it's written in.
+For example,
+it knows that ".pc" is usually a C source file for an Oracle preprocessor,
+but it can detect many circumstances where it's actually a file about
+a "PC" (personal computer).
+For another example, it knows that ".m" is the standard extension for
+Objective-C, but it will check the file contents to
+see if really is Objective-C.
+It will even examine file headers to attempt to accurately determine
+the file's true type.
+As a result, you can analyze large systems completely automatically.
+<p>
+Finally, SLOCCount has some report-generating tools
+to collect the data generated,
+and then present it in several different formats and sorted different ways.
+The report-generating tool can also generate simple tab-separated files
+so data can be passed on to other analysis tools (such as spreadsheets
+and database systems).
+<p>
+SLOCCount will try to quickly estimate development time and effort given only
+the lines of code it computes, using the original Basic COCOMO model.
+This estimate can be improved if you can give more information about the project.
+See the
+<a href="#cocomo">discussion below about COCOMO, including intermediate COCOMO</a>,
+if you want to improve the estimates by giving additional information about
+the project.
+<p>
+SLOCCount is open source software/free software (OSS/FS),
+released under the GNU General Public License (GPL), version 2;
+see the <a href="#license">license below</a>.
+The master web site for SLOCCount is
+<a href="http://www.dwheeler.com/sloccount">http://www.dwheeler.com/sloccount</a>.
+You can learn a lot about SLOCCount by reading the paper that caused its
+creation, available at
+<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>.
+Feel free to see my master web site at
+<a href="http://www.dwheeler.com">http://www.dwheeler.com</a>, which has
+other material such as the
+<a href="http://www.dwheeler.com/secure-programs"><i>Secure Programming
+for Linux and Unix HOWTO</i></a>,
+my <a href="http://www.dwheeler.com/oss_fs_refs.html">list of
+OSS/FS references</a>, and my paper
+<a href="http://www.dwheeler.com/oss_fs_why.html"><i>Why OSS/FS? Look at
+the Numbers!</i></a>
+Please send improvements by email
+to dwheeler, at, dwheeler.com (DO NOT SEND SPAM - please remove the
+commas, remove the spaces, and change the word "at" into the at symbol).
+<p>
+The following sections first give a "quick start"
+(discussing how to use SLOCCount once it's installed),
+discuss basic SLOCCount concepts,
+how to install it, how to set your PATH,
+how to install source code on RPM-based systems if you wish, and
+more information on how to use the "sloccount" front-end.
+This is followed by material for advanced users:
+how to use SLOCCount tools individually (for when you want more control
+than the "sloccount" tool gives you), designer's notes,
+the definition of SLOC, and miscellaneous notes.
+The last sections states the license used (GPL) and gives
+hints on how to submit changes to SLOCCount (if you decide to make changes
+to the program).
+
+
+<p>
+<h1><a name="quick-start">Quick Start</a></h1>
+<p>
+Once you've installed SLOCCount (discussed below),
+you can measure an arbitrary program by typing everything
+after the dollar sign into a terminal session:
+<pre>
+ $ sloccount <i>topmost-source-code-directory</i>
+</pre>
+<p>
+The directory listed and all its descendants will be examined.
+You'll see output while it calculates,
+culminating with physical SLOC totals and
+estimates of development time, schedule, and cost.
+If the directory contains a set of directories, each of which is
+a different project developed independently,
+use the "--multiproject" option so the effort estimations
+can correctly take this into account.
+<p>
+You can redisplay the data different ways by using the "--cached"
+option, which skips the calculation stage and re-prints previously
+computed information.
+You can use other options to control what's displayed:
+"--filecount" shows counts of files instead of SLOC, and
+"--details" shows the detailed information about every source code file.
+So, to display all the details of every file once you've previously
+calculated the results, just type:
+<pre>
+ sloccount --cached --details
+</pre>
+<p>
+You'll notice that the default output ends with a request.
+If you use this data (e.g., in a report), please
+credit that data as being "generated using 'SLOCCount' by David A. Wheeler."
+I make no money from this program, so at least please give me some credit.
+<p>
+SLOCCount tries to ignore all automatically generated files, but its
+heuristics to detect this are necessarily imperfect (after all, even humans
+sometimes have trouble determining if a file was automatically genenerated).
+If possible, try to clean out automatically generated files from
+the source directories --
+in many situations "make clean" does this.
+<p>
+There's more to SLOCCount than this, but first we'll need to
+explain some basic concepts, then we'll discuss other options
+and advanced uses of SLOCCount.
+
+<p>
+<h1><a name="concepts">Basic Concepts</a></h1>
+<p>
+SLOCCount counts physical SLOC, also called "non-blank, non-comment lines".
+More formally, physical SLOC is defined as follows:
+``a physical source line of code (SLOC) is a line ending
+in a newline or end-of-file marker,
+and which contains at least one non-whitespace non-comment character.''
+Comment delimiters (characters other than newlines starting and ending
+a comment) are considered comment characters.
+Data lines only including whitespace
+(e.g., lines with only tabs and spaces in multiline strings) are not included.
+<p>
+In SLOCCount, there are 3 different directories:
+<ol>
+<li>The "source code directory", a directory containing the source code
+ being measured
+ (possibly in recursive subdirectories). The directories immediately
+ contained in the source code directory will normally be counted separately,
+ so it helps if your system is designed so that this top set of directories
+ roughly represents the system's major components.
+ If it doesn't, there are various tricks you can use to group source
+ code into components, but it's more work.
+ You don't need write access to the source code directory, but
+ you do need read access to all files, and read and search (execute) access
+ to all subdirectories.
+<li>The "bin directory", the directory containing the SLOCCount executables.
+ By default, installing the program creates a subdirectory
+ named "sloccount-VERSION" which is the bin directory.
+ The bin directory must be part of your PATH.
+<li>The "data directory", which stores the analysis results.
+ When measuring programs using "sloccount", by default
+ this is the directory ".slocdata" inside your home directory.
+ When you use the advanced SLOCCount tools directly,
+ in many cases this must be your "current" directory.
+ Inside the data directory are "data directory children" - these are
+ subdirectories that contain a file named "filelist", and each child
+ is used to represent a different project or a different
+ major component of a project.
+</ol>
+<p>
+SLOCCount can handle many different programming languages, and separate
+them by type (so you can compare the use of each).
+Here is the set of languages, sorted alphabetically;
+common filename extensions are in
+parentheses, with SLOCCount's ``standard name'' for the language
+listed in brackets:
+<ol>
+<li>Ada (.ada, .ads, .adb, .pad) [ada]
+<li>Assembly for many machines and assemblers (.s, .S, .asm) [asm]
+<li>awk (.awk) [awk]
+<li>Bourne shell and relatives such as bash, ksh, zsh, and pdksh (.sh) [sh]
+<li>C (.c, .pc, .ec, .ecp) [ansic]
+<li>C++ (.C, .cpp, .cxx, .cc, .pcc) [cpp]
+<li>C# (.cs) [cs]
+<li>C shell including tcsh (.csh) [csh]
+<li>COBOL (.cob, .cbl, .COB, .CBL) [cobol]
+<li>Expect (.exp) [exp]
+<li>Fortran 77 (.f, .f77, .F, .F77) [fortran]
+<li>Fortran 90 (.f90, .F90) [f90]
+<li>Haskell (.hs, .lhs) [haskell]; deals with both types of literate files.
+<li>Java (.java) [java]
+<li>lex (.l) [lex]
+<li>LISP including Scheme (.cl, .el, .scm, .lsp, .jl) [lisp]
+<li>makefiles (makefile) [makefile]
+<li>ML (.ml, .ml3) [ml]
+<li>Modula3 (.m3, .mg, .i3, .ig) [modula3]
+<li>Objective-C (.m) [objc]
+<li>Pascal (.p, .pas) [pascal]
+<li>Perl (.pl, .pm, .perl) [perl]
+<li>PHP (.php, .php[3456], .inc) [php]
+<li>Python (.py) [python]
+<li>Ruby (.rb) [ruby]
+<li>sed (.sed) [sed]
+<li>sql (.sql) [sql]
+<li>TCL (.tcl, .tk, .itk) [tcl]
+<li>Yacc (.y) [yacc]
+</ol>
+
+<p>
+<h1><a name="installing">Installing SLOCCount</a></h1>
+<p>
+Obviously, before using SLOCCount you'll need to install it.
+SLOCCount depends on other programs, in particular perl, bash,
+a C compiler (gcc will do), and md5sum
+(you can get a useful md5sum program in the ``textutils'' package
+on many Unix-like systems), so you'll need to get them installed
+if they aren't already.
+<p>
+If your system uses RPM version 4 or greater to install software
+(e.g., Red Hat Linux 7 or later), just download the SLOCCount RPM
+and install it using a normal installation command; from the text line
+you can use:
+<pre>
+ rpm -Uvh sloccount*.rpm
+</pre>
+<p>
+Everyone else will need to install from a tar file, and Windows users will
+have to install Cygwin before installing sloccount.
+<p>
+If you're using Windows, you'll need to first install
+<a href="http://sources.redhat.com/cygwin">Cygwin</a>.
+By installing Cygwin, you'll install an environment and a set of
+open source Unix-like tools.
+Cygwin essentially creates a Unix-like environment in which sloccount can run.
+You may be able to run parts of sloccount without Cygwin, in particular,
+the perl programs should run in the Windows port of Perl, but you're
+on your own - many of the sloccount components expect a Unix-like environment.
+If you want to install Cygwin, go to the
+<a href="http://sources.redhat.com/cygwin">Cygwin main page</a>
+and install it.
+If you're using Cygwin, <b>install it to use Unix newlines, not
+DOS newlines</b> - DOS newlines will cause odd errors in SLOCCount
+(and probably other programs, too).
+I have only tested a "full" Cygwin installation, so I suggest installing
+everything.
+If you're short on disk space, at least install
+binutils, bash, fileutils, findutils,
+gcc, grep, gzip, make, man, perl, readline,
+sed, sh-utils, tar, textutils, unzip, and zlib;
+you should probably install vim as well,
+and there may be other dependencies as well.
+By default Cygwin will create a directory C:\cygwin\home\NAME,
+and will set up the ability to run Unix programs
+(which will think that the same directory is called /home/NAME).
+Now double-click on the Cygwin icon, or select from the Start menu
+the selection Programs / Cygnus Solutions / Cygwin Bash shell;
+you'll see a terminal screen with a Unix-like interface.
+Now follow the instructions (next) for tar file users.
+<p>
+If you're installing from the tar file, download the file
+(into your home directory is fine).
+Unpacking the file will create a subdirectory, so if you want the
+unpacked subdirectory to go somewhere special, "cd" to where you
+want it to go.
+Most likely, your home directory is just fine.
+Now gunzip and untar SLOCCount (the * replaces the version #) by typing
+this at a terminal session:
+<pre>
+ gunzip -c sloccount*.tar.gz | tar xvf -
+</pre>
+Replace "sloccount*.tar.gz" shown above
+with the full path of the downloaded file, wherever that is.
+You've now created the "bin directory", which is simply the
+"sloccount-VERSION" subdirectory created by the tar command
+(where VERSION is the version number).
+<p>
+Now you need to compile the few compiled programs in the "bin directory" so
+SLOCCount will be ready to go.
+First, cd into the newly-created bin directory, by typing:
+<pre>
+ cd sloccount*
+</pre>
+<p>
+You may then need to override some installation settings.
+You can can do this by editing the supplied makefile, or alternatively,
+by providing options to "make" whenever you run make.
+The supplied makefile assumes your C compiler is named "gcc", which
+is true for most Linux systems, *BSD systems, and Windows systems using Cygwin.
+If this isn't true, you'll need to set
+the "CC" variable to the correct value (e.g., "cc").
+You can also modify where the files are stored; this variable is
+called PREFIX and its default is /usr/local
+(older versions of sloccount defaulted to /usr).
+<p>
+If you're using Windows and Cygwin, you
+<b>must</b> override one of the installation
+settings, EXE_SUFFIX, for installation to work correctly.
+One way to set this value is to edit the "makefile" file so that
+the line beginning with "EXE_SUFFIX" reads as follows:
+<pre>
+ EXE_SUFFIX=.exe
+</pre>
+If you're using Cygwin and you choose to modify the "makefile", you
+can use any text editor on the Cygwin side, or you can use a
+Windows text editor if it can read and write Unix-formatted text files.
+Cygwin users are free to use vim, for example.
+If you're installing into your home directory and using the default locations,
+Windows text editors will see the makefile as file
+C:\cygwin\home\NAME\sloccount-VERSION\makefile.
+Note that the Windows "Notepad" application doesn't work well, because it's not
+able to handle Unix text files correctly.
+Since this can be quite a pain, Cygus users may instead decide to override
+make the makefile values instead during installation.
+<p>
+Finally, compile the few compiled programs in it by typing "make":
+<pre>
+ make
+</pre>
+If you didn't edit the makefile in the previous step, you
+need to provide options to make invocations to set the correct values.
+This is done by simply saying (after "make") the name of the variable,
+an equal sign, and its correct value.
+Thus, to compile the program on a Windows system using Cygus, you can
+skip modifying the makefile file by typing this instead of just "make":
+<pre>
+ make EXE_SUFFIX=.exe
+</pre>
+<p>
+If you want, you can install sloccount for system-wide use without
+using the RPM version.
+Windows users using Cygwin should probably do this, particularly
+if they chose a "local" installation.
+To do this, first log in as root (Cygwin users don't need to do this
+for local installation).
+Edit the makefile to match your system's conventions, if necessary,
+and then type "make install":
+<pre>
+ make install
+</pre>
+If you need to set some make options, remember to do that here too.
+If you use "make install", you can uninstall it later using
+"make uninstall".
+Installing sloccount for system-wide use is optional;
+SLOCCount works without a system-wide installation.
+However, if you don't install sloccount system-wide, you'll need to
+set up your PATH variable; see the section on
+<a href="#path">setting your path</a>.
+<p>
+A note for Cygwin users (and some others): some systems, including Cygwin,
+don't set up the environment quite right and thus can't display the manual
+pages as installed.
+The problem is that they forget to search /usr/local/share/man for
+manual pages.
+If you want to read the installed manual pages, type this
+into a Bourne-like shell:
+<pre>
+ MANPATH=/usr/local/share/man:/usr/share/man:/usr/man
+ export MANPATH
+</pre>
+Or, if you use a C shell:
+<pre>
+ setenv MANPATH "/usr/local/share/man:/usr/share/man:/usr/man"
+</pre>
+From then on, you'll be able to view the reference manual pages
+by typing "man sloccount" (or by using whatever manual page display system
+you prefer).
+<p>
+
+<p>
+<h1><a name="installing-source">Installing The Source Code To Measure</a></h1>
+<p>
+Obviously, you must install the software source code you're counting,
+so somehow you must create the "source directory"
+with the source code to measure.
+You must also make sure that permissions are set so the software can
+read these directories and files.
+<p>
+For example, if you're trying to count the SLOC for an RPM-based Linux system,
+install the software source code by doing the following as root
+(which will place all source code into the source directory
+/usr/src/redhat/BUILD):
+<ol>
+<li>Install all source rpm's:
+<pre>
+ mount /mnt/cdrom
+ cd /mnt/cdrom/SRPMS
+ rpm -ivh *.src.rpm
+</pre>
+<li>Remove RPM spec files you don't want to count:
+<pre>
+ cd ../SPECS
+ (look in contents of spec files, removing what you don't want)
+</pre>
+<li>build/prep all spec files:
+<pre>
+ rpm -bp *.spec
+</pre>
+<li>Set permissions so the source files can be read by all:
+<pre>
+ chmod -R a+rX /usr/src/redhat/BUILD
+</pre>
+</ol>
+<p>
+Here's an example of how to download source code from an
+anonymous CVS server.
+Let's say you want to examine the source code in GNOME's "gnome-core"
+directory, as stored at the CVS server "anoncvs.gnome.org".
+Here's how you'd do that:
+<ol>
+<li>Set up site and login parameters:
+<pre>
+ export CVSROOT=':pserver:anonymous@anoncvs.gnome.org:/cvs/gnome'
+</pre>
+<li>Log in:
+<pre>
+ cvs login
+</pre>
+<li>Check out the software (copy it to your local directory), using
+mild compression to save on bandwidth:
+<pre>
+ cvs -z3 checkout gnome-core
+</pre>
+</ol>
+<p>
+Of course, if you have a non-anonymous account, you'd set CVSROOT
+to reflect this. For example, to log in using the "pserver"
+protocol as ACCOUNT_NAME, do:
+<pre>
+ export CVSROOT=':pserver:ACCOUNT_NAME@cvs.gnome.org:/cvs/gnome'
+</pre>
+<p>
+You may need root privileges to install the source code and to give
+another user permission to read it, but please avoid running the
+sloccount program as root.
+Although I know of no specific reason this would be a problem,
+running any program as root turns off helpful safeguards.
+<p>
+Although SLOCCount tries to detect (and ignore) many cases where
+programs are automatically generated, these heuristics are necessarily
+imperfect.
+So, please don't run any programs that generate other programs - just
+do enough to get the source code prepared for counting.
+In general you shouldn't run "make" on the source code, and if you have,
+consider running "make clean" or "make really_clean" on the source code first.
+It often doesn't make any difference, but identifying those circumstances
+is difficult.
+<p>
+SLOCCount will <b>not</b> automatically uncompress files that are
+compressed/archive files (such as .zip, .tar, or .tgz files).
+Often such files are just "left over" old versions or files
+that you're already counting.
+If you want to count the contents of compressed files, uncompress them first.
+<p>
+SLOCCount also doesn't delve into files using "literate programming"
+techniques, in part because there are too many incompatible formats
+that implement it.
+Thus, run the tools to extract the code from the literate programming files
+before running SLOCCount. Currently, the only exception to this rule is
+Haskell.
+
+
+<h1><a name="path">Setting your PATH</a></h1>
+Before you can run SLOCCount, you'll need to make sure
+the SLOCCount "bin directory" is in your PATH.
+If you've installed SLOCCount in a system-wide location
+such as /usr/bin, then you needn't do more; the RPMs and "make install"
+commands essentially do this.
+<p>
+Otherwise, in Bourne-shell variants, type:
+<pre>
+ PATH="$PATH:<i>the directory with SLOCCount's executable files</i>"
+ export PATH
+</pre>
+Csh users should instead type:
+<pre>
+ setenv PATH "$PATH:<i>the directory with SLOCCount's executable files</i>"
+</pre>
+
+<h1><a name="using-basics">Using SLOCCount: The Basics</a></h1>
+
+Normal use of SLOCCount is very simple.
+In a terminal window just type "sloccount", followed by a
+list of the source code directories to count.
+If you give it only a single directory, SLOCCount tries to be
+a little clever and break the source code into
+subdirectories for purposes of reporting:
+<ol>
+<li>if directory has at least
+two subdirectories, then those subdirectories will be used as the
+breakdown (see the example below).
+<li>If the single directory contains files as well as directories
+(or if you give sloccount some files as parameters), those files will
+be assigned to the directory "top_dir" so you can tell them apart
+from other directories.
+<li>If there's a subdirectory named "src", then that subdirectory is again
+broken down, with all the further subdirectories prefixed with "src_".
+So if directory "X" has a subdirectory "src", which contains subdirectory
+"modules", the program will report a separate count from "src_modules".
+</ol>
+In the terminology discussed above, each of these directories would become
+"data directory children."
+<p>
+You can also give "sloccount" a list of directories, in which case the
+report will be broken down by these directories
+(make sure that the basenames of these directories differ).
+SLOCCount normally considers all descendants of these directories,
+though unless told otherwise it ignores symbolic links.
+<p>
+This is all easier to explain by example.
+Let's say that we want to measure Apache 1.3.12 as installed using an RPM.
+Once it's installed, we just type:
+<pre>
+ sloccount /usr/src/redhat/BUILD/apache_1.3.12
+</pre>
+The output we'll see shows status reports while it analyzes things,
+and then it prints out:
+
+<pre>
+SLOC Directory SLOC-by-Language (Sorted)
+24728 src_modules ansic=24728
+19067 src_main ansic=19067
+8011 src_lib ansic=8011
+5501 src_os ansic=5340,sh=106,cpp=55
+3886 src_support ansic=2046,perl=1712,sh=128
+3823 src_top_dir sh=3812,ansic=11
+3788 src_include ansic=3788
+3469 src_regex ansic=3407,sh=62
+2783 src_ap ansic=2783
+1378 src_helpers sh=1345,perl=23,ansic=10
+1304 top_dir sh=1304
+104 htdocs perl=104
+31 cgi-bin sh=24,perl=7
+0 icons (none)
+0 conf (none)
+0 logs (none)
+
+
+ansic: 69191 (88.85%)
+sh: 6781 (8.71%)
+perl: 1846 (2.37%)
+cpp: 55 (0.07%)
+
+
+Total Physical Source Lines of Code (SLOC) = 77873
+Estimated Development Effort in Person-Years (Person-Months) = 19.36 (232.36)
+ (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05))
+Estimated Schedule in Years (Months) = 1.65 (19.82)
+ (Basic COCOMO model, Months = 2.5 * (person-months**0.38))
+Estimated Average Number of Developers (Effort/Schedule) = 11.72
+Total Estimated Cost to Develop = $ 2615760
+ (average salary = $56286/year, overhead = 2.4).
+
+Please credit this data as "generated using 'SLOCCount' by David A. Wheeler."
+</pre>
+<p>
+Interpreting this should be straightforward.
+The Apache directory has several subdirectories, including "htdocs", "cgi-bin",
+and "src".
+The "src" directory has many subdirectories in it
+("modules", "main", and so on).
+Code files directly
+contained in the main directory /usr/src/redhat/BUILD/apache_1.3.12
+is labelled "top_dir", while
+code directly contained in the src subdirectory is labelled "src_top_dir".
+Code in the "src/modules" directory is labelled "src_modules" here.
+The output shows each major directory broken
+out, sorted from largest to smallest.
+Thus, the "src/modules" directory had the most code of the directories,
+24728 physical SLOC, all of it in C.
+The "src/helpers" directory had a mix of shell, perl, and C; note that
+when multiple languages are shown, the list of languages in that child
+is also sorted from largest to smallest.
+<p>
+Below the per-component set is a list of all languages used,
+with their total SLOC shown, sorted from most to least.
+After this is the total physical SLOC (77,873 physical SLOC in this case).
+<p>
+Next is an estimation of the effort and schedule (calendar time)
+it would take to develop this code.
+For effort, the units shown are person-years (with person-months
+shown in parentheses); for schedule, total years are shown first
+(with months in parentheses).
+When invoked through "sloccount", the default assumption is that all code is
+part of a single program; the "--multiproject" option changes this
+to assume that all top-level components are independently developed
+programs.
+When "--multiproject" is invoked, each project's efforts are estimated
+separately (and then summed), and the schedule estimate presented
+is the largest estimated schedule of any single component.
+<p>
+By default the "Basic COCOMO" model is used for estimating
+effort and schedule; this model
+includes design, code, test, and documentation time (both
+user/admin documentation and development documentation).
+<a href="#cocomo">See below for more information on COCOMO</a>
+as it's used in this program.
+<p>
+Next are several numbers that attempt to estimate what it would have cost
+to develop this program.
+This is simply the amount of effort, multiplied by the average annual
+salary and by the "overhead multiplier".
+The default annual salary is
+$56,286 per year; this value was from the
+<i>ComputerWorld</i>, September 4, 2000's Salary Survey
+of an average U.S. programmer/analyst salary in the year 2000.
+You might consider using other numbers
+(<i>ComputerWorld</i>'s September 3, 2001 Salary Survey found
+an average U.S. programmer/analyst salary making $55,100, senior
+systems programmers averaging $68,900, and senior systems analysts averaging
+$72,300).
+
+<p>
+Overhead is much harder to estimate; I did not find a definitive source
+for information on overheads.
+After informal discussions with several cost analysts,
+I determined that an overhead of 2.4
+would be representative of the overhead sustained by
+a typical software development company.
+As discussed in the next section, you can change these numbers too.
+
+<p>
+You may be surprised by the high cost estimates, but remember,
+these include design, coding, testing, documentation (both for users
+and for programmers), and a wrap rate for corporate overhead
+(to cover facilities, equipment, accounting, and so on).
+Many programmers forget these other costs and are shocked by the high figures.
+If you only wanted to know the costs of the coding, you'd need to get
+those figures.
+
+
+<p>
+Note that if any top-level directory has a file named PROGRAM_LICENSE,
+that file is assumed to contain the name of the license
+(e.g., "GPL", "LGPL", "MIT", "BSD", "MPL", and so on).
+If there is at least one such file, sloccount will also report statistics
+on licenses.
+
+<p>
+Note: sloccount internally uses MD5 hashes to detect duplicate files,
+and thus needs some program that can compute MD5 hashes.
+Normally it will use "md5sum" (available, for example, as a GNU utility).
+If that doesn't work, it will try to use "md5" and "openssl", and you may
+see error messages in this format:
+<pre>
+ Can't exec "md5sum": No such file or directory at
+ /usr/local/bin/break_filelist line 678, &lt;CODE_FILE&gt; line 15.
+ Can't exec "md5": No such file or directory at
+ /usr/local/bin/break_filelist line 678, &lt;CODE_FILE&gt; line 15.
+</pre>
+You can safely ignore these error messages; these simply show that
+SLOCCount is probing for a working program to compute MD5 hashes.
+For example, Mac OS X users normally don't have md5sum installed, but
+do have md5 installed, so they will probably see the first error
+message (because md5sum isn't available), followed by a note that a
+working MD5 program was found.
+
+
+<h1><a name="options">Options</a></h1>
+The program "sloccount" has a large number of options
+so you can control what is selected for counting and how the
+results are displayed.
+<p>
+There are several options that control which files are selected
+for counting:
+<pre>
+ --duplicates Count all duplicate files as normal files
+ --crossdups Count duplicate files if they're in different data directory
+ children.
+ --autogen Count automatically generated files
+ --follow Follow symbolic links (normally they're ignored)
+ --addlang Add languages to be counted that normally aren't shown.
+ --append Add more files to the data directory
+</pre>
+Normally, files which have exactly the same content are counted only once
+(data directory children are counted alphabetically, so the child
+"first" in the alphabet will be considered the owner of the master copy).
+If you want them all counted, use "--duplicates".
+Sometimes when you use sloccount, each directory represents a different
+project, in which case you might want to specify "--crossdups".
+The program tries to reject files that are automatically generated
+(e.g., a C file generated by bison), but you can disable this as well.
+You can use "--addlang" to show makefiles and SQL files, which aren't
+usually counted.
+<p>
+Possibly the most important option is "--cached".
+Normally, when sloccount runs, it computes a lot of information and
+stores this data in a "data directory" (by default, "~/.slocdata").
+The "--cached" option tells sloccount to use data previously computed,
+greatly speeding up use once you've done the computation once.
+The "--cached" option can't be used along with the options used to
+select what files should be counted.
+You can also select a different data directory by using the
+"--datadir" option.
+<p>
+There are many options for controlling the output:
+<pre>
+ --filecount Show counts of files instead of SLOC.
+ --details Present details: present one line per source code file.
+ --wide Show "wide" format. Ignored if "--details" selected
+ --multiproject Assume each directory is for a different project
+ (this modifies the effort estimation calculations)
+ --effort F E Change the effort estimation model, so that it uses
+ F as the factor and E as the exponent.
+ --schedule F E Change the schedule estimation model, so that it uses
+ F as the factor and E as the exponent.
+ --personcost P Change the average annual salary to P.
+ --overhead O Change the annual overhead to O.
+ -- End of options
+</pre>
+<p>
+Basically, the first time you use sloccount, if you're measuring
+a set of projects (not a single project) you might consider
+using "--crossdups" instead of the defaults.
+Then, you can redisplay data quickly by using "--cached",
+combining it with options such as "--filecount".
+If you want to send the data to another tool, use "--details".
+<p>
+If you're measuring a set of projects, you probably ought to pass
+the option "--multiproject".
+When "--multiproject" is used, efforts are computed for each component
+separately and summed, and the time estimate used is the maximum
+single estimated time.
+<p>
+The "--details" option dumps the available data in 4 columns,
+tab-separated, where each line
+represents a source code file in the data directory children identified.
+The first column is the SLOC, the second column is the language type,
+the third column is the name of the data directory child
+(as it was given to get_sloc_details),
+and the last column is the absolute pathname of the source code file.
+You can then pipe this output to "sort" or some other tool for further
+analysis (such as a spreadsheet or RDBMS).
+<p>
+You can change the parameters used to estimate effort using "--effort".
+For example, if you believe that in the environment being used
+you can produce 2 KSLOC/month scaling linearly, then
+that means that the factor for effort you should use is 1/2 = 0.5 month/KSLOC,
+and the exponent for effort is 1 (linear).
+Thus, you can use "--effort 0.5 1".
+<p>
+You can also set the annual salary and overheads used to compute
+estimated development cost.
+While "$" is shown, there's no reason you have to use dollars;
+the unit of development cost is the same unit as the unit used for
+"--personcost".
+
+<h1><a name="cocomo">More about COCOMO</a></h1>
+
+<p>
+By default SLOCCount uses a very simple estimating model for effort and schedule:
+the basic COCOMO model in the "organic" mode (modes are more fully discussed below).
+This model estimates effort and schedule, including design, code, test,
+and documentation time (both user/admin documentation and development documentation).
+Basic COCOMO is a nice simple model, and it's used as the default because
+it doesn't require any information about the code other than the SLOC count
+already computed.
+<p>
+However, basic COCOMO's accuracy is limited for the same reason -
+basic COCOMO doesn't take a number of important factors into account.
+If you have the necessary information, you can improve the model's accuracy
+by taking these factors into account. You can at least quickly determine
+if the right "mode" is being used to improve accuracy. You can also
+use the "Intermediate COCOMO" and "Detailed COCOMO" models that take more
+factors into account, and are likely to produce more accurate estimates as
+a result. Take these estimates as just that - estimates - they're not grand truths.
+If you have the necessary information,
+you can improve the model's accuracy by taking these factors into account, and
+pass this additional information to sloccount using its
+"--effort" and "--schedule" options (as discussed in
+<a href="#options">options</a>).
+<p>
+To use the COCOMO model, you first need to determine if your application's
+mode, which can be "Organic", "embedded", or "semidetached".
+Most software is "organic" (which is why it's the default).
+Here are simple definitions of these modes:
+<ul>
+<li>Organic: Relatively small software teams develop software in a highly
+familiar, in-house environment. &nbsp;It has a generally stable development
+environment, minimal need for innovative algorithms, and requirements can
+be relaxed to avoid extensive rework.</li>
+<li>Semidetached: This is an intermediate
+step between organic and embedded. This is generally characterized by reduced
+flexibility in the requirements.</li>
+<li>Embedded: The project must operate
+within tight (hard-to-meet) constraints, and requirements
+and interface specifications are often non-negotiable.
+The software will be embedded in a complex environment that the
+software must deal with as-is.</li>
+</ul>
+By default, SLOCCount uses the basic COCOMO model in the organic mode.
+For the basic COCOMO model, here are the critical factors for --effort and --schedule:<br>
+<ul>
+<li>Organic: effort factor = 2.4, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+<li>Semidetached: effort factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+<li>Embedded: effort factor = 3.6, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li>
+</ul>
+Thus, if you want to use SLOCCount but the project is actually semidetached,
+you can use the options "--effort 3.0 1.12 --schedule 2.5 0.35"
+to get a more accurate estimate.
+<br>
+For more accurate estimates, you can use the intermediate COCOMO models.
+For intermediate COCOMO, use the following figures:<br>
+<ul>
+ <li>Organic: effort base factor = 2.3, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+ <li>Semidetached: effort base factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+ <li>Embedded: effort base factor = 2.8, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li>
+</ul>
+The intermediate COCOMO values for schedule are exactly the same as the basic
+COCOMO model; the starting effort values are not quite the same, as noted
+in Boehm's book. However, in the intermediate COCOMO model, you don't
+normally use the effort factors as-is, you use various corrective factors
+(called cost drivers). To use these corrections, you consider
+all the cost drivers, determine what best describes them,
+and multiply their corrective values by the effort base factor.
+The result is the final effort factor.
+Here are the cost drivers (from Boehm's book, table 8-2 and 8-3):
+
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+ <tbody>
+ <tr>
+ <th rowspan="1" colspan="2">Cost Drivers
+ </th>
+ <th rowspan="1" colspan="6">Ratings
+ </th>
+ </tr>
+ <tr>
+ <th>ID
+ </th>
+ <th>Driver Name
+ </th>
+ <th>Very Low
+ </th>
+ <th>Low
+ </th>
+ <th>Nominal
+ </th>
+ <th>High
+ </th>
+ <th>Very High
+ </th>
+ <th>Extra High
+ </th>
+ </tr>
+ <tr>
+ <td>RELY
+ </td>
+ <td>Required software reliability
+ </td>
+ <td>0.75 (effect is slight inconvenience)
+ </td>
+ <td>0.88 (easily recovered losses)
+ </td>
+ <td>1.00 (recoverable losses)
+ </td>
+ <td>1.15 (high financial loss)
+ </td>
+ <td>1.40 (risk to human life)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>DATA
+ </td>
+ <td>Database size
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>0.94 (database bytes/SLOC &lt; 10)
+ </td>
+ <td>1.00 (D/S between 10 and 100)
+ </td>
+ <td>1.08 (D/S between 100 and 1000)
+ </td>
+ <td>1.16 (D/S &gt; 1000)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>CPLX
+ </td>
+ <td>Product complexity
+ </td>
+ <td>0.70 (mostly straightline code, simple arrays, simple expressions)
+ </td>
+ <td>0.85
+ </td>
+ <td>1.00
+ </td>
+ <td>1.15
+ </td>
+ <td>1.30
+ </td>
+ <td>1.65 (microcode, multiple resource scheduling, device timing dependent coding)
+ </td>
+ </tr>
+ <tr>
+ <td>TIME
+ </td>
+ <td>Execution time constraint
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>1.00 (&lt;50% use of available execution time)
+ </td>
+ <td>1.11 (70% use)
+ </td>
+ <td>1.30 (85% use)
+ </td>
+ <td>1.66 (95% use)
+ </td>
+ </tr>
+ <tr>
+ <td>STOR
+ </td>
+ <td>Main storage constraint
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>1.00&nbsp;(&lt;50% use of available storage)</td>
+ <td>1.06 (70% use)
+ </td>
+ <td>1.21 (85% use)
+ </td>
+ <td>1.56 (95% use)
+ </td>
+ </tr>
+ <tr>
+ <td>VIRT
+ </td>
+ <td>Virtual machine (HW and OS) volatility
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>0.87 (major change every 12 months, minor every month)
+ </td>
+ <td>1.00 (major change every 6 months, minor every 2 weeks)</td>
+ <td>1.15 (major change every 2 months, minor changes every week)
+ </td>
+ <td>1.30 (major changes every 2 weeks, minor changes every 2 days)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>TURN
+ </td>
+ <td>Computer turnaround time
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>0.87 (interactive)
+ </td>
+ <td>1.00 (average turnaround &lt; 4 hours)
+ </td>
+ <td>1.07
+ </td>
+ <td>1.15
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>ACAP
+ </td>
+ <td>Analyst capability
+ </td>
+ <td>1.46 (15th percentile)
+ </td>
+ <td>1.19 (35th percentile)
+ </td>
+ <td>1.00 (55th percentile)
+ </td>
+ <td>0.86 (75th percentile)
+ </td>
+ <td>0.71 (90th percentile)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>AEXP
+ </td>
+ <td>Applications experience
+ </td>
+ <td>1.29 (&lt;= 4 months experience)
+ </td>
+ <td>1.13 (1 year)
+ </td>
+ <td>1.00 (3 years)
+ </td>
+ <td>0.91 (6 years)
+ </td>
+ <td>0.82 (12 years)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>PCAP
+ </td>
+ <td>Programmer capability
+ </td>
+ <td>1.42 (15th percentile)
+ </td>
+ <td>1.17 (35th percentile)
+ </td>
+ <td>1.00 (55th percentile)
+ </td>
+ <td>0.86 (75th percentile)
+ </td>
+ <td>0.70 (90th percentile)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>VEXP
+ </td>
+ <td>Virtual machine experience
+ </td>
+ <td>1.21 (&lt;= 1 month experience)
+ </td>
+ <td>1.10 (4 months)
+ </td>
+ <td>1.00 (1 year)
+ </td>
+ <td>0.90 (3 years)
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>LEXP
+ </td>
+ <td>Programming language experience
+ </td>
+ <td>1.14 (&lt;= 1 month experience)
+ </td>
+ <td>1.07 (4 months)
+ </td>
+ <td>1.00 (1 year)
+ </td>
+ <td>0.95 (3 years)
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>MODP
+ </td>
+ <td>Use of "modern" programming practices (e.g. structured programming)
+ </td>
+ <td>1.24 (No use)
+ </td>
+ <td>1.10
+ </td>
+ <td>1.00 (some use)
+ </td>
+ <td>0.91
+ </td>
+ <td>0.82 (routine use)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>TOOL
+ </td>
+ <td>Use of software tools
+ </td>
+ <td>1.24
+ </td>
+ <td>1.10
+ </td>
+ <td>1.00 (basic tools)
+ </td>
+ <td>0.91 (test tools)
+ </td>
+ <td>0.83 (requirements, design, management, documentation tools)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>SCED
+ </td>
+ <td>Required development schedule
+ </td>
+ <td>1.23 (75% of nominal)
+ </td>
+ <td>1.08 (85% of nominal)
+ </td>
+ <td>1.00 (nominal)
+ </td>
+ <td>1.04 (130% of nominal)
+ </td>
+ <td>1.10 (160% of nominal)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ </tbody>
+</table>
+<br>
+<br>
+<br>
+So, once all of the factors have been multiplied together, you can
+then use the "--effort" flag to set more accurate factors and exponents.
+Note that some factors will probably not be "nominal" simply because
+times have changed since COCOMO was originally developed, so a few regions
+that were desirable have become more common today.
+For example,
+for many software projects of today, virtual machine volatility tends to
+be low, and the
+use of "modern" programming practices (structured programming,
+object-oriented programming, abstract data types, etc.) tends to be high.
+COCOMO automatically handles these differences.
+<p>
+For example, imagine that you're examining a fairly simple application that
+meets the "organic" requirements. Organic projects have a base factor
+of 2.3 and exponents of 1.05, as noted above.
+We then examine all the factors to determine a corrected base factor.
+For this example, imagine
+that we determine the values of these cost drivers are as follows:<br>
+<br>
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+
+ <tbody>
+ <tr>
+ <td rowspan="1" colspan="2">Cost Drivers<br>
+ </td>
+ <td rowspan="1" colspan="2">Ratings<br>
+ </td>
+ </tr>
+ <tr>
+ <td>ID<br>
+ </td>
+ <td>Driver Name<br>
+ </td>
+ <td>Rating<br>
+ </td>
+ <td>Multiplier<br>
+ </td>
+ </tr>
+ <tr>
+ <td>RELY<br>
+ </td>
+ <td>Required software reliability<br>
+ </td>
+ <td>Low - easily recovered losses<br>
+ </td>
+ <td>0.88<br>
+ </td>
+ </tr>
+ <tr>
+ <td>DATA<br>
+ </td>
+ <td>Database size<br>
+ </td>
+ <td>Low<br>
+ </td>
+ <td>0.94<br>
+ </td>
+ </tr>
+ <tr>
+ <td>CPLX<br>
+ </td>
+ <td>Product complexity<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>TIME<br>
+ </td>
+ <td>Execution time constraint<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>STOR<br>
+ </td>
+ <td>Main storage constraint<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>VIRT<br>
+ </td>
+ <td>Virtual machine (HW and OS) volatility<br>
+ </td>
+ <td>Low (major change every 12 months, minor every month)<br>
+ </td>
+ <td>0.87<br>
+ </td>
+ </tr>
+ <tr>
+ <td>TURN<br>
+ </td>
+ <td>Computer turnaround time<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>ACAP<br>
+ </td>
+ <td>Analyst capability<br>
+ </td>
+ <td>Nominal (55th percentile)<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>AEXP<br>
+ </td>
+ <td>Applications experience<br>
+ </td>
+ <td>Nominal (3 years)<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>PCAP<br>
+ </td>
+ <td>Programmer capability<br>
+ </td>
+ <td>Nominal (55th percentile)<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>VEXP<br>
+ </td>
+ <td>Virtual machine experience<br>
+ </td>
+ <td>High (3 years)<br>
+ </td>
+ <td>0.90<br>
+ </td>
+ </tr>
+ <tr>
+ <td>LEXP<br>
+ </td>
+ <td>Programming language experience<br>
+ </td>
+ <td>High (3 years)<br>
+ </td>
+ <td>0.95<br>
+ </td>
+ </tr>
+ <tr>
+ <td>MODP<br>
+ </td>
+ <td>Use of "modern" programming practices (e.g. structured programming)<br>
+ </td>
+ <td>High (Routine use)<br>
+ </td>
+ <td>0.82<br>
+ </td>
+ </tr>
+ <tr>
+ <td>TOOL<br>
+ </td>
+ <td>Use of software tools<br>
+ </td>
+ <td>Nominal (basic tools)<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>SCED<br>
+ </td>
+ <td>Required development schedule<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+
+
+
+
+ </tbody>
+</table>
+<p>
+So, starting with the base factor (2.3 in this case), and then multiplying
+the driver values, we'll compute a final factor of:
+By multiplying these driver values together in this example, we compute:<br>
+<pre>2.3*0.88*0.94*1*1*1*0.87*1.00*1*1*1*0.90*0.95*0.82*1*1</pre>
+For this
+example, the final factor for the effort calculation is 1.1605. You would then
+invoke sloccount with "--effort 1.1605 1.05" to pass in the corrected factor
+and exponent for the effort estimation.
+You don't need to use "--schedule" to set the factors when you're using
+organic model, because in SLOCCount
+the default values are the values for the organic model.
+You can set scheduling parameters manually
+anyway by setting "--schedule 2.5 0.38".
+You <i>do</i> need to use the --schedule option for
+embedded and semidetached projects, because those modes have different
+schedule parameters. The final command would be:<br>
+<br>
+sloccount --effort 1.1605 1.05 --schedule 2.5 0.38 my_project<br>
+<p>
+The detailed COCOMO model requires breaking information down further.
+<p>
+For more information about the original COCOMO model, including the detailed
+COCOMO model, see the book
+<i>Software Engineering Economics</i> by Barry Boehm.
+<p>
+You may be surprised by the high cost estimates, but remember,
+these include design, coding, testing (including
+integration and testing), documentation (both for users
+and for programmers), and a wrap rate for corporate overhead
+(to cover facilities, equipment, accounting, and so on).
+Many programmers forget these other costs and are shocked by the high cost
+estimates.
+<p>
+If you want to know a subset of this cost, you'll need to isolate
+just those figures that you're trying to measure.
+For example, let's say you want to find the money a programmer would receive
+to do just the coding of the units of the program
+(ignoring wrap rate, design, testing, integration, and so on).
+According to Boehm's book (page 65, table 5-2),
+the percentage varies by product size.
+For effort, code and unit test takes 42% for small (2 KSLOC), 40% for
+intermediate (8 KSLOC), 38% for medium (32 KSLOC), and 36% for large
+(128 KSLOC).
+Sadly, Boehm doesn't separate coding from unit test; perhaps
+50% of the time is spent in unit test in traditional proprietary
+development (including fixing bugs found from unit test).
+If you want to know the income to the programmer (instead of cost to
+the company), you'll also want to remove the wrap rate.
+Thus, a programmer's income to <i>only</i> write the code for a
+small program (circa 2 KSLOC) would be 8.75% (42% x 50% x (1/2.4))
+of the default figure computed by SLOCCount.
+<p>
+In other words, less than one-tenth of the cost as computed by SLOCCount
+is what actually would be made by a programmer for a small program for
+just the coding task.
+Note that a proprietary commercial company that bid using
+this lower figure would rapidly go out of business, since this figure
+ignores the many other costs they have to incur to actually develop
+working products.
+Programs don't arrive out of thin air; someone needs to determine what
+the requirements are, how to design it, and perform at least
+some testing of it.
+<p>
+There's another later estimation model for effort and schedule
+called "COCOMO II", but COCOMO II requires logical SLOC instead
+of physical SLOC.
+SLOCCount doesn't currently measure logical SLOC, so
+SLOCCount doesn't currently use COCOMO II.
+Contributions of code to compute logical SLOC and then optionally
+use COCOMO II will be gratefully accepted.
+
+<h1><a name="specific-files">Counting Specific Files</a></h1>
+<p>
+If you want to count a specific subset, you can use the "--details"
+option to list individual files, pipe this into "grep" to select the
+files you're interested in, and pipe the result to
+my tool "print_sum" (which reads lines beginning with numbers, and
+returns the total of those numbers).
+If you've already done the analysis, an example would be:
+<pre>
+ sloccount --cached --details | grep "/some/subdirectory/" | print_sum
+</pre>
+<p>
+If you just want to count specific files, and you know what language
+they're in, you
+can just invoke the basic SLOC counters directly.
+By convention the simple counters are named "LANGUAGE_count",
+and they take on the command line a list of the
+source files to count.
+Here are some examples:
+<pre>
+ c_count *.c *.cpp *.h # Count C and C++ in current directory.
+ asm_count *.S # Count assembly.
+</pre>
+All the counters (*_count) program accept a &quot;-f FILENAME&quot; option, where FILENAME
+is a file containing the names of all the source files to count
+(one file per text line). If FILENAME is &quot;-&quot;, the
+ list of file names is taken from the standard input.
+The &quot;c_count&quot; program handles both C and C++ (but not objective-C;
+for that use objc_count).
+The available counters are
+ada_count,
+asm_count,
+awk_count,
+c_count,
+csh_count,
+exp_count,
+fortran_count,
+f90_count,
+java_count,
+lex_count,
+lisp_count,
+ml_count,
+modula3_count,
+objc_count,
+pascal_count,
+perl_count,
+python_count,
+sed_count,
+sh_count,
+sql_count, and
+tcl_count.
+<p>
+There is also "generic_count", which takes as its first parameter
+the ``comment string'', followed by a list of files.
+The comment string begins a comment that ends at the end of the line.
+Sometimes, if you have source for a language not listed, generic_count
+will be sufficient.
+<p>
+The basic SLOC counters will send output to standard out, one line per file
+(showing the SLOC count and filename).
+The assembly counter shows some additional information about each file.
+The basic SLOC counters always complete their output with a line
+saying "Total:", followe by a line with the
+total SLOC count.
+
+<h1><a name="errors">Countering Problems and Handling Errors</a></h1>
+
+If you're analyzing unfamiliar code, there's always the possibility
+that it uses languages not processed by SLOCCount.
+To counter this, after running SLOCCount, run the following program:
+<pre>
+ count_unknown_ext
+</pre>
+This will look at the resulting data (in its default data directory
+location, ~/.slocdata) and report a sorted list of the file extensions
+for uncategorized ("unknown") files.
+The list will show every file extension and how many files had that
+extension, and is sorted by most common first.
+It's not a problem if an "unknown" type isn't a source code file, but
+if there are a significant number of source files in this category,
+you'll need to change SLOCCount to get an accurate result.
+
+<p>
+One error report that you may see is:
+<pre>
+ c_count ERROR - terminated in string in (filename)
+</pre>
+
+The cause of this is that c_count (the counter for C-like languages)
+keeps track of whether or not it's in a string, and when the counter
+reached the end of the file, it still thought it was in a string.
+
+<p>
+Note that c_count really does have to keep track of whether or
+not it's a string.
+For example, this is three lines of code, not two, because the
+``comment'' is actually in string data:
+
+<pre>
+ a = "hello
+ /* this is not a comment */
+ bye";
+</pre>
+<p>
+Usually this error means you have code that won't compile
+given certain #define settings. E.G., XFree86 has a line of code that's
+actually wrong (it has a string that's not terminated), but people
+don't notice because the #define to enable it is not usually set.
+Legitimate code can trigger this message, but code that triggers
+this message is horrendously formatted and is begging for problems.
+
+<p>
+In either case, the best way to handle the situation
+is to modify the source code (slightly) so that the code's intent is clear
+(by making sure that double-quotes balance).
+If it's your own code, you definitely should fix this anyway.
+You need to look at the double-quote (") characters. One approach is to
+just grep for double-quote, and look at every line for text that isn't
+terminated, e.g., printf("hello %s, myname);
+
+<p>
+SLOCcount reports warnings when an unusually
+large number of duplicate files are reported.
+A large number of duplicates <i>may</i> suggest that you're counting
+two different versions of the same program as though they were
+independently developed.
+You may want to cd into the data directory (usually ~/.slocdata), cd into
+the child directories corresponding to each component, and then look
+at their dup_list.dat files, which list the filenames that appeared
+to be duplicated (and what they duplicate with).
+
+
+<h1><a name="adding">Adding Support for New Languages</a></h1>
+SLOCcount handles many languages, but if it doesn't support one you need,
+you'll need to give the language a standard (lowercase ASCII) name,
+then modify SLOCcount to (1) detect and (2) count code in that language.
+
+<ol>
+<li>
+To detect a new language, you'll need to modify the program break_filelist.
+If the filename extension is reliable, you can modify the array
+%file_extensions, which maps various filename extensions into languages.
+If your needs are more complex, you'll need to modify the code
+(typically in functions get_file_type or file_type_from_contents)
+so that the correct file type is determined.
+For example, if a file with a given filename extension is only
+<i>sometimes</i> that type, you'll need to write code to examine the
+file contents.
+<li>
+You'll need to create a SLOC counter for that language type.
+It must have the name XYZ_count, where XYZ is the standard name for the
+language.
+<p>
+For some languages, you may be able to use the ``generic_count'' program
+to implement your counter - generic_count takes as its first argument
+the pattern which
+identifies comment begins (which continue until the end of the line);
+the other arguments are the files to count.
+Thus, the LISP counter looks like this:
+<pre>
+ #!/bin/sh
+ generic_count ';' $@
+</pre>
+The generic_count program won't work correctly if there are multiline comments
+(e.g., C) or multiline string constants.
+If your language is identical to C/C++'s syntax in terms of
+string constant definitions and commenting syntax
+(using // or /* .. */), then you can use the c_count program - in this case,
+modify compute_sloc_lang so that the c_count program is used.
+<p>
+Otherwise, you'll have to devise your own counting program.
+The program must generate files with the same format, e.g.,
+for every filename passed as an argument, it needs to return separate lines,
+where each line presents the SLOC
+for that file, a space, and the filename.
+(Note: the assembly language counter produces a slightly different format.)
+After that, print "Total:" on its own line, and the actual SLOC total
+on the following (last) line.
+</ol>
+
+<h1><a name="advanced">Advanced SLOCCount Use</a></h1>
+For most people, the previous information is enough.
+However, if you're measuring a large set of programs, or have unusual needs,
+those steps may not give you enough control.
+In that case, you may need to create your own "data directory"
+by hand and separately run the SLOCCount tools.
+Basically, "sloccount" (note the lower case) is the name for
+a high-level tool which invokes many other tools; this entire
+suite is named SLOCCount (note the mixed case).
+The next section will describe how to invoke the various tools "manually"
+so you can gain explicit control over the measuring process when
+the defaults are not to your liking, along with various suggestions
+for how to handle truly huge sets of data.
+<p>
+Here's how to manually create a "data directory" to hold
+intermediate results, and how to invoke each tool in sequence
+(with discussion of options):
+<ol>
+<li>Set your PATH to include the SLOCCount "bin directory", as discussed above.
+<li>Make an empty "data directory"
+(where all intermediate results will be stored);
+you can pick any name and location you like for this directory.
+Here, I'll use the name "data":
+<pre>
+ mkdir ~/data
+</pre>
+<li>Change your current directory to this "data directory":
+<pre>
+ cd ~/data
+</pre>
+The rest of these instructions assume that your current directory
+is the data directory.
+You can set up many different data directories if you wish, to analyze
+different source programs or analyze the programs in different ways;
+just "cd" to the one you want to work with.
+<li>(Optional) Some of the later steps will produce
+a lot of output while they're running.
+If you want to capture this information into a file, use the standard
+"script" command do to so.
+For example, "script run1" will save the output of everything you do into
+file "run1" (until you type control-D to stop saving the information).
+Don't forget that you're creating such a file, or it will become VERY large,
+and in particular don't type any passwords into such a session.
+You can store the script in the data directory, or create a subdirectory
+for such results - any data directory subdirectory that doesn't have the
+special file "filelist" is not a "data directory child" and is thus
+ignored by the later SLOCCount analysis routines.
+<li>Now initialize the "data directory".
+ In particular, initialization will create the "data directory children",
+ a set of subdirectories equivalent to the source code directory's
+ top directories. Each of these data directory children (subdirectories)
+ will contain a file named "filelist", which
+ lists all filenames in the corresponding source code directory.
+ These data directory children
+ will also eventually contain intermediate results
+ of analysis, which you can check for validity
+ (also, having a cache of these values speeds later analysis steps).
+ <p>
+ You use the "make_filelists" command to initialize a data directory.
+ For example, if your source code is in /usr/src/redhat/BUILD, run:
+<pre>
+ make_filelists /usr/src/redhat/BUILD/*
+</pre>
+<p>
+ Internally, make_filelists uses "find" to create the list of files, and
+ by default it ignores all symbolic links. However, you may need to
+ follow symbolic links; if you do, give make_filelists the
+ "--follow" option (which will use find's "-follow" option).
+ Here are make_filelists' options:
+<pre>
+ --follow Follow symbolic links
+ --datadir D Use this data directory
+ --skip S Skip basenames named S
+ --prefix P When creating children, prepend P to their name.
+ -- No more options
+</pre>
+<p>
+ Although you don't normally need to do so, if you want certain files to
+ not be counted at all in your analysis, you can remove
+ data directory children or edit the "filelist" files to do so.
+ There's no need to remove files which aren't source code files normally;
+ this is handled automatically by the next step.
+<p>
+ If you don't have a single source code directory where the subdirectories
+ represent the major components you want to count separately, you can
+ still use the tool but it's more work.
+ One solution is to create a "shadow" directory with the structure
+ you wish the program had, using symbolic links (you must use "--follow"
+ for this to work).
+ You can also just invoke make_filelists multiple times, with parameters
+ listing the various top-level directories you wish to include.
+ Note that the basenames of the directories must be unique.
+<p>
+ If there are so many directories (e.g., a massive number of projects)
+ that the command line is too long,
+ you can run make_filelists multiple times in the same
+ directory with different arguments to create them.
+ You may find "find" and/or "xargs" helpful in doing this automatically.
+ For example, here's how to do the same thing using "find":
+<pre>
+ find /usr/src/redhat/BUILD -maxdepth 1 -mindepth 1 -type d \
+ -exec make_filelists {} \;
+</pre>
+<li>Categorize each file.
+This means that we must determine which
+files contain source code (eliminating auto-generated and duplicate files),
+and of those files which language each file contains.
+The result will be a set of files in each subdirectory of the data directory,
+where each file represents a category (e.g., a language).
+<pre>
+ break_filelist *
+</pre>
+ At this point you might want to examine the data directory subdirectories
+ to ensure that "break_filelist" has correctly determined the types of
+ the various files.
+ In particular, the "unknown" category may have source files in a language
+ SLOCCount doesn't know about.
+ If the heuristics got some categorization wrong, you can modify the
+ break_filelist program and re-run break_filelist.
+<p>
+ By default break_filelist removes duplicates, doesn't count
+ automatically generated files as normal source code files, and
+ only gives some feedback. You can change these defaults with the
+ following options:
+<pre>
+ --duplicates Count all duplicate files as normal files
+ --crossdups Count duplicate files if they're in different data directory
+ children (i.e., in different "filelists")
+ --autogen Count automatically generated files
+ --verbose Present more verbose status information while processing.
+</pre>
+<p>
+ Duplicate control in particular is an issue; you probably don't want
+ duplicates counted, so that's the default.
+ Duplicate files are detected by determining if their MD5 checksums
+ are identical; the "first" duplicate encountered is the only one kept.
+ Normally, since shells sort directory names, this means that the
+ file in the alphabetically first child directory is the one counted.
+ You can change this around by listing directories in the sort order you
+ wish followed by "*"; if the same data directory child
+ is requested for analysis more
+ than once in a given execution, it's skipped after the first time.
+ So, if you want any duplicate files with child directory "glibc" to
+ count as part of "glibc", then you should provide the data directory children
+ list as "glibc *".
+<p>
+ Beware of choosing something other than "*" as the parameter here,
+ unless you use the "--duplicates" or "--crossdups" options.
+ The "*" represents the list of data directory children to examine.
+ Since break_filelist skips duplicate files identified
+ in a particular run, if you run break_filelist
+ on only certain children, some duplicate files won't be detected.
+ If you're allowing duplicates (via "--duplicates" or
+ "--crossdups"), then this isn't a problem.
+ Or, you can use the ``--duplistfile'' option to store and retrieve
+ hashes of files, so that additional files can be handled.
+<p>
+ If there are so many directories that the command line is too long,
+ you can run break_filelist multiple times and give it
+ a subset of the directories each time.
+ You'll need to use one of the duplicate control options to do this.
+ I would suggest using "--crossdups", which
+ means that duplicates inside a child will only be counted once,
+ eliminating at least some of the problems of duplicates.
+ Here's the equivalent of "break_filelist *" when there are a large
+ number of subdirectories:
+<pre>
+ find . -maxdepth 1 -mindepth 1 -type d -exec break_filelist --crossdups {} \;
+</pre>
+ Indeed, for all of the later commands where "*" is listed as the parameter
+ in these instructions
+ (for the list of data directory children), just run the above "find"
+ command and replace "break_filelist --crossdups" with the command shown.
+<li>(Optional)
+If you're not very familiar with the program you're analyzing, you
+might not be sure that "break_filelist" has correctly identified
+all of the files.
+In particular, the system might be using an unexpected
+programming language or extension not handled by SLOCCount.
+If this is your circumstance, you can just run the command:
+<pre>
+ count_unknown_ext
+</pre>
+(note that this command is unusual - it doesn't take any arguments,
+since it's hard to imagine a case where you wouldn't want every
+directory examined).
+Unlike the other commands discussed, this one specifically looks at
+${HOME}/.slocdata.
+This command presents a list of extensions which are unknown to break_filelist,
+with the most common ones listed first.
+The output format is a name, followed by the number of instances;
+the name begins with a "." if it's an extension, or, if there's no
+extension, it begins with "/" followed by the base name of the file.
+break_filelist already knows about common extensions such as ".gif" and ".png",
+as well as common filenames like "README".
+You can also view the contents of each of the data directory children's
+files to see if break_filelist has correctly categorized the files.
+<li>Now compute SLOC and filecounts for each language; you can compute for all
+ languages at once by calling:
+<pre>
+ compute_all *
+</pre>
+If you only want to compute SLOC for a specific language,
+you can invoke compute_sloc_lang, which takes as its first parameter
+the SLOCCount name of the language ("ansic" for C, "cpp" for C++,
+"ada" for Ada, "asm" for assembly), followed by the list
+of data directory children.
+Note that these names are a change from version 1.0, which
+called the master program "compute_all",
+and had "compute_*" programs for each language.
+<p>
+Notice the "*"; you can replace the "*" with just the list of
+data directory children (subdirectories) to compute, if you wish.
+Indeed, you'll notice that nearly all of the following commands take a
+list of data directory children as arguments; when you want all of them, use
+"*" (as shown in these instructions), otherwise, list the ones you want.
+<p>
+When you run compute_all or compute_sloc_lang, each data directory
+child (subdirectory)
+is consulted in turn for a list of the relevant files, and the
+SLOC results are placed in that data directory child.
+In each child,
+the file "LANGUAGE-outfile.dat" lists the information from the
+basic SLOC counters.
+That is, the oufile lists the SLOC and filename
+(the assembly outfile has additional information), and ends with
+a line saying "Total:" followed by a line showing the total SLOC of
+that language in that data directory child.
+The file "all-physical.sloc" has the final total SLOC for every language
+in that child directory (i.e., it's the last line of the outfile).
+<li>(Optional) If you want, you can also use USC's CodeCount.
+I've had trouble with these programs, so I don't do this normally.
+However, you're welcome to try - they support logical SLOC measures
+as well as physical ones (though not for most of the languages
+supported by SLOCCount).
+Sadly, they don't seem to compile in gcc without a lot of help, they
+used fixed-width buffers that make me nervous, and I found a
+number of bugs (e.g., it couldn't handle "/* text1 *//* text2 */" in
+C code, a format that's legal and used often in the Linux kernel).
+If you want to do this,
+modify the files compute_c_usc and compute_java_usc so they point to the
+right directories, and type:
+<pre>
+ compute_c_usc *
+</pre>
+<li>Now you can analyze the results. The main tool for
+presenting SLOCCount results is "get_sloc", e.g,:
+<pre>
+ get_sloc * | less
+</pre>
+The get_sloc program takes many options, including:
+<pre>
+ --filecount Display number of files instead of SLOC (SLOC is the default)
+ --wide Use "wide" format instead (tab-separated columns)
+ --nobreak Don't insert breaks in long lines
+ --sort X Sort by "X", where "X" is the name of a language
+ ("ansic", "cpp", "fortran", etc.), or "total".
+ By default, get_sloc sorts by "total".
+ --nosort Don't sort - just present results in order of directory
+ listing given.
+ --showother Show non-language totals (e.g., # duplicate files).
+ --oneprogram When computing effort, assume that all files are part of
+ a single program. By default, each subdirectory specified
+ is assumed to be a separate, independently-developed program.
+ --noheader Don't show the header
+ --nofooter Don't show the footer (the per-language values and totals)
+</pre>
+<p>
+Note that unlike the "sloccount" tool, get_sloc requires the current
+directory to be the data directory.
+<p>
+If you're displaying SLOC, get_sloc will also estimate the time it
+would take to develop the software using COCOMO (using its "basic" model).
+By default, this figure assumes that each of the major subdirectories was
+developed independently of the others;
+you can use "--oneprogram" to make the assumption that all files are
+part of the same program.
+The COCOMO model makes many other assumptions; see the paper at
+<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>
+for more information.
+<p>
+If you need to do more analysis, you might want to use the "--wide"
+option and send the data to another tool such as a spreadsheet
+(e.g., gnumeric) or RDBMS (e.g., PostgreSQL).
+Using the "--wide" option creates tab-separated data, which is easier to
+import.
+You may also want to use the "--noheader" and/or "--nofooter" options to
+simplify porting the data to another tool.
+<p>
+Note that in version 1.0, "get_sloc" was called "get_data".
+<p>
+If you have so many data directory children that you can't use "*"
+on the command line, get_sloc won't be as helpful.
+Feel free to patch get_sloc to add this capability (as another option),
+or use get_sloc_detail (discussed next) to feed the data into another tool.
+<li>(Optional) If you just can't get the information you need from get_sloc,
+then you can get the raw results of everything and process the data
+yourself.
+I have a little tool to do this, called get_sloc_details.
+You invoke it in a similar manner:
+<pre>
+get_sloc_details *
+</pre>
+</ol>
+
+<p>
+<h1><a name="designer-notes">Designer's Notes</a></h1>
+<p>
+Here are some ``designer's notes'' on how SLOCCount works,
+including what it can handle.
+<p>
+The program break_filelist
+has categories for each programming language it knows about,
+plus the special categories ``not'' (not a source code file),
+``auto'' (an automatically-generated file and thus not to be counted),
+``zero'' (a zero-length file),
+``dup'' (a duplicate of another file as determined by an md5 checksum),
+and
+``unknown'' (a file which doesn't seem to be a source code file
+nor any of these other categories).
+It's a good idea to examine
+the ``unknown'' items later, checking the common extensions
+to ensure you have not missed any common types of code.
+<p>
+The program break_filelist uses lots of heuristics to correctly
+categorize files.
+Here are few notes about its heuristics:
+<ol>
+<li>
+break_filelist first checks for well-known extensions (such as .gif) that
+cannot be program files, and for a number of common generated filenames.
+<li>
+It then peeks at the first few lines for "#!" followed by a legal script
+name.
+Sometimes it looks further, for example, many Python programs
+invoke "env" and then use it to invoke python.
+<li>
+If that doesn't work, it uses the extension to try to determine the category.
+For a number of languages, the extension is not reliable, so for those
+languages it examines the file contents and uses a set of heuristics
+to determine if the file actually belongs to that category.
+<li>
+Detecting automatically generated files is not easy, and it's
+quite conceivable that it won't detect some automatically generated files.
+The first 15 lines are examined, to determine if any of them
+include at the beginning of the line (after spaces and
+possible comment markers) one of the following phrases (ignoring
+upper and lower case distinctions):
+``generated automatically'',
+``automatically generated'',
+``this is a generated file'',
+``generated with the (something) utility'',
+or ``do not edit''.
+<li>A number of filename conventions are used, too.
+For example,
+any ``configure'' file is presumed to be automatically generated if
+there's a ``configure.in'' file in the same directory.
+<li>
+To eliminate duplicates,
+the program keeps md5 checksums of each program file.
+Any given md5 checksum is only counted once.
+Build directories are processed alphabetically, so
+if the same file content is in both directories ``a'' and ``b'',
+it will be counted only once as being part of ``a'' unless you make
+other arrangements.
+Thus, some data directory children with names later in the alphabet may appear
+smaller than would make sense at first glance.
+It is very difficult to eliminate ``almost identical'' files
+(e.g., an older and newer version of the same code, included in two
+separate packages), because
+it is difficult to determine when two ``similar'' files are essentially
+the same file.
+Changes such as the use of pretty-printers and massive renaming of variables
+could make small changes seem large, while the small files
+might easily appear to be the ``same''.
+Thus, files with different contents are simply considered different.
+<li>
+If all else fails, the file is placed in the ``unknown'' category for
+later analysis.
+</ol>
+<p>
+One complicating factor is that I wished to separate C, C++, and
+Objective-C code, but a header file ending with
+``.h'' or ``.hpp'' file could be any of these languages.
+In theory, ``.hpp'' is only C++, but I found that in practice this isn't true.
+I developed a number of heuristics to determine, for each file,
+what language a given header belonged to.
+For example, if a given directory has exactly one of these languages
+(ignoring header files),
+the header is assumed to belong to that category as well.
+Similarly, if there is a body file (e.g., ".c") that has the same name
+as the header file, then presumably the header file is of the same language.
+Finally, a header file with the keyword ``class'' is almost certainly not a
+C header file, but a C++ header file; otherwise it's assumed to
+be a C file.
+<p>
+None of the SLOC counters fully parse the source code; they just examine
+the code using simple text processing patterns to count the SLOC.
+In practice, by handling a number of special cases this seems to be fine.
+Here are some notes on some of the language counters;
+the language name is followed by common extensions in parentheses
+and the SLOCCount name of the language in brackets:
+<ol>
+<li>Ada (.ada, .ads, .adb) [ada]: Comments begin with "--".
+<li>Assembly (.s, .S, .asm) [asm]:
+Assembly languages vary greatly in the comment character they use,
+so my counter had to handle this variance.
+The assembly language counter (asm_count)
+first examines the file to determine if
+C-style ``/*'' comments and C preprocessor commands
+(e.g., ``#include'') are used.
+If both ``/*'' and ``*/'' are in the file, it's assumed that
+C-style comments are being used
+(since it is unlikely that <i>both</i> would be used
+as something else, say as string data, in the same assembly language file).
+Determining if a file used the C preprocessor was trickier, since
+many assembly files do use ``#'' as a comment character and some
+preprocessor directives are ordinary words that might be included
+in a human comment.
+The heuristic used is as follows: if #ifdef, #endif, or #include are used, the
+C preprocessor is used; or if at least three lines have either #define or #else,
+then the C preprocessor is used.
+No doubt other heuristics are possible, but this at least seems to produce
+reasonable results.
+The program then determines what the comment character is by identifying
+which punctuation mark (from a set of possible marks)
+is the most common non-space initial character on a line
+(ignoring ``/'' and ``#'' if C comments or preprocessor commands,
+respectively, are used).
+Once the comment character has been determined, and it's been determined
+if C-style comments are allowed, the lines of code
+are counted in the file.
+<li>awk (.awk) [awk]: Comments begin with "#".
+<li>C (.c) [ansic]: Both traditional C comments (/* .. */) and C++
+(//) style comments are supported.
+Although the older ANSI and ISO C standards didn't support // style
+comments, in practice many C programs have used them for some time, and
+the C99 standard includes them.
+The C counter understands multi-line strings, so
+comment characters (/* .. */ and //) are treated as data inside strings.
+Conversely, the counter knows that any double-quote characters inside a
+comment does not begin a C/C++ string.
+<li>C++ (.C, .cpp, .cxx, .cc) [cpp]: The same counter is used for
+both C and C++.
+Note that break_filelist does try to separate C from C++ for purposes
+of accounting between them.
+<li>C# (.cs): The same counter is used as for C and C++.
+Note that there are no "header" filetypes in C#.
+<li>C shell (.csh) [csh]: Comments begin with "#".
+<li>COBOL (.cob, .cbl) [cobol]: SLOCCount
+detects if a "freeform" command has been given; until such a command is
+given, fixed format is assumed.
+In fixed format, comments have a "*" or "/" in column 7 or column 1;
+any line that's not a comment, and has a nonwhitespace character after column 7
+(the indicator area) is counted as a source line of code.
+In a freeform style, any line beginning with optional whitespace and
+then "*" or "/" is considered a comment; any noncomment line
+with a nonwhitespace characeter is counted as SLOC.
+<li>Expect (.exp) [exp]: Comments begin with "#".
+<li>Fortran 77 (.f, .f77, .F, .F77) [fortran]: Comment-only lines are lines
+where column 1 character = C, c, *, or !, or
+where ! is preceded only by white space.
+<li>Fortran 90 (.f90, .F90) [f90]: Comment-only lines are lines
+where ! is preceded only by white space.
+<li>Haskell (.hs) [haskell]:
+This counter handles block comments {- .. -} and single line comments (--);
+pragmas {-# .. -} are counted as SLOC.
+This is a simplistic counter,
+and can be fooled by certain unlikely combinations of block comments
+and other syntax (line-ending comments or strings).
+In particular, "Hello {-" will be incorrectly interpreted as a
+comment block begin, and "{- -- -}" will be incorrectly interpreted as a
+comment block begin without an end. Literate files are detected by
+their extension, and the style (TeX or plain text) is determined by
+searching for a \begin{code} or "&gt;" at the beginning of lines.
+See the <a
+ href="http://www.haskell.org/onlinereport/literate.html">Haskell 98
+ report section on literate Haskell</a> for more information.
+<li>Java (.java) [java]: Java is counted using the same counter as C and C++.
+<li>lex (.l) [lex]: Uses traditional C /* .. */ comments.
+Note that this does not use the counter as C/C++ internally, since
+it's quite legal in lex to have "//" (where it is NOT a comment).
+<li>LISP (.cl, .el, .scm, .lsp, .jl) [lisp]: Comments begin with ";".
+<li>ML (.ml, .mli, .mll, mly) [ml]: Comments nest and are enclosed in (* .. *).
+<li>Modula3 (.m3, .mg, .i3, .ig) [modula3]: Comments are enclosed in (* .. *).
+<li>Objective-C (.m) [objc]: Comments are old C-style /* .. */ comments.
+<li>Pascal (.p, .pas) [pascal]: Comments are enclosed in curly braces {}
+or (*..*). This counter has known weaknesses; see the BUGS section of
+the manual page for more information.
+<li>Perl (.pl, .pm, .perl) [perl]:
+Comments begin with "#".
+Perl permits in-line ``perlpod'' documents, ``here'' documents, and an
+__END__ marker that complicate code-counting.
+Perlpod documents are essentially comments, but a ``here'' document
+may include text to generate them (in which case the perlpod document
+is data and should be counted).
+The __END__ marker indicates the end of the file from Perl's
+viewpoint, even if there's more text afterwards.
+<li>PHP (.php, .php[3456], .inc) [php]:
+Code is counted as PHP code if it has a .php file extension;
+it's also counted if it has an .inc extension and looks like PHP code.
+SLOCCount does <b>not</b> count PHP code embedded in HTML files normally,
+though its lower-level routines can do so if you want to
+(use php_count to do this).
+Any of the various ways to begin PHP code can be used
+(&lt;? .. ?&gt;,
+&lt;?php .. ?&gt;,
+&lt;script language="php"&gt; .. &lt;/script&gt;,
+or even &lt;% .. %&gt;).
+Any of the PHP comment formats (C, C++, and shell) can be used, and
+any string constant formats ("here document", double quote, and single
+quote) can be used as well.
+<li>Python (.py) [python]:
+Comments begin with "#".
+Python has a convention that, at the beginning of a definition
+(e.g., of a function, method, or class), an unassigned string can be
+placed to describe what's being defined. Since this is essentially
+a comment (though it doesn't syntactically look like one), the counter
+avoids counting such strings, which may have multiple lines.
+To handle this,
+strings which started the beginning of a line were not counted.
+Python also has the ``triple quote'' operator, permitting multiline
+strings; these needed to be handled specially.
+Triple quote stirngs are normally considered as data, regardless of
+content, unless they were used as a comment about a definition.
+<li>Ruby (.rb) [ruby]: Comments begin with "#".
+<li>sed (.sed) [sed]: Comments begin with "#".
+Note that these are "sed-only" files; many uses of sed are embeded in
+shell scripts (and are categorized as shell scripts in those cases).
+<li>shell (.sh) [sh]: Comments begin with "#".
+Note that I classify ksh, bash, and the original Bourne shell sh together,
+because they have very similar syntaxes.
+For example, in all of these shells,
+setting a variable is expressed as "varname=value",
+while C shells use the use "set varname=value".
+<li>TCL (.tcl, .tk, .itk) [tcl]: Comments begin with "#".
+<li>Yacc (.y) [yacc]: Yacc is counted using the same counter as C and C++.
+</ol>
+<p>
+Much of the code is written in Perl, since it's primarily a text processing
+problem and Perl is good at that.
+Many short scripts are Bourne shell scripts (it's good at
+short scripts for calling other programs), and the
+basic C/C++ SLOC counter is written in C for speed.
+<p>
+I originally named it "SLOC-Count", but I found that some web search
+engines (notably Google) treated that as two words.
+By naming it "SLOCCount", it's easier to find by those who know
+the name of the program.
+<p>
+SLOCCount only counts physical SLOC, not logical SLOC.
+Logical SLOC counting requires much more code to implement,
+and I needed to cover a large number of programming languages.
+
+
+<p>
+<h1><a name="sloc-definition">Definition of SLOC</a></h1>
+<p>
+This tool measures ``physical SLOC.''
+Physical SLOC is defined as follows:
+``a physical source line of code (SLOC) is a line ending
+in a newline or end-of-file marker,
+and which contains at least one non-whitespace non-comment character.''
+Comment delimiters (characters other than newlines starting and ending
+a comment) are considered comment characters.
+Data lines only including whitespace
+(e.g., lines with only tabs and spaces in multiline strings) are not included.
+<p>
+To make this concrete, here's an example of a simple C program
+(it strips ANSI C comments out).
+On the left side is the running SLOC total, where "-" indicates a line
+that is not considered a physical "source line of code":
+<pre>
+ 1 #include &lt;stdio.h&gt;
+ -
+ - /* peek at the next character in stdin, but don't get it */
+ 2 int peek() {
+ 3 int c = getchar();
+ 4 ungetc(c, stdin);
+ 5 return c;
+ 6 }
+ -
+ 7 main() {
+ 8 int c;
+ 9 int incomment = 0; /* 1 = we are inside a comment */
+ -
+10 while ( (c = getchar()) != EOF) {
+11 if (!incomment) {
+12 if ((c == '/') &amp;&amp; (peek() == '*')) {incomment=1;}
+13 } else {
+14 if ((c == '*') &amp;&amp; (peek() == '/')) {
+15 c= getchar(); c=getchar(); incomment=0;
+16 }
+17 }
+18 if ((c != EOF) &amp;&amp; !incomment) {putchar(c);}
+19 }
+20 }
+</pre>
+<p>
+<a href="http://www.sei.cmu.edu/publications/documents/92.reports/92.tr.020.html">Robert E. Park et al.'s
+<i>Software Size Measurement:
+A Framework for Counting Source Statements</i></a>
+(Technical Report CMU/SEI-92-TR-20)
+presents a set of issues to be decided when trying to count code.
+The paper's abstract states:
+<blockquote><i>
+This report presents guidelines for defining, recording, and reporting
+two frequently used measures of software sizeÑ physical source lines
+and logical source statements.
+We propose a general framework for constructing size
+definitions and use it to derive operational methods for
+reducing misunderstandings in measurement results.
+</i></blockquote>
+<p>
+Using Park's framework, here is how physical lines of code are counted:
+<ol>
+<li>Statement Type: I used a physical line-of-code as my basis.
+I included executable statements, declarations
+(e.g., data structure definitions), and compiler directives
+(e.g., preprocessor commands such as #define).
+I excluded all comments and blank lines.
+<li>How Produced:
+I included all programmed code, including any files that had been modified.
+I excluded code generated with source code generators, converted with
+automatic translators, and those copied or reused without change.
+If a file was in the source package, I included it; if the file had
+been removed from a source package (including via a patch), I did
+not include it.
+<li>Origin: You select the files (and thus their origin).
+<li>Usage: You selects the files (and thus their usage), e.g.,
+you decide if you're going to
+include additional applications able to run on the system but not
+included with the system.
+<li>Delivery: You'll decide what code to include, but of course,
+if you don't have the code you can't count it.
+<li>Functionality: This tool will include both operative and inoperative code
+if they're mixed together.
+An example of intentionally ``inoperative'' code is
+code turned off by #ifdef commands; since it could be
+turned on for special purposes, it made sense to count it.
+An example of unintentionally ``inoperative'' code is dead or unused code.
+<li>Replications:
+Normally, duplicate files are ignored, unless you use
+the "--duplicates" or "--crossdups" option.
+The tool will count
+``physical replicates of master statements stored in
+the master code''.
+This is simply code cut and pasted from one place to another to reuse code;
+it's hard to tell where this happens, and since it has to be maintained
+separately, it's fair to include this in the measure.
+I excluded copies inserted, instantiated, or expanded when compiling
+or linking, and I excluded postproduction replicates
+(e.g., reparameterized systems).
+<li>Development Status: You'll decide what code
+should be included (and thus the development status of the code that
+you'll accept).
+<li>Languages: You can see the language list above.
+<li>Clarifications: I included all statement types.
+This included nulls, continues, no-ops, lone semicolons,
+statements that instantiate generics,
+lone curly braces ({ and }), and labels by themselves.
+</ol>
+<p>
+Thus, SLOCCount generally follows Park's ``basic definition'',
+but with the following exceptions depending on how you use it:
+<ol>
+<li>How Produced:
+By default, this tool excludes duplicate files and
+code generated with source code generators.
+After all, the COCOMO model states that the
+only code that should be counted is code
+``produced by project personnel'', whereas these kinds of files are
+instead the output of ``preprocessors and compilers.''
+If code is always maintained as the input to a code generator, and then
+the code generator is re-run, it's only the code generator input's size that
+validly measures the size of what is maintained.
+Note that while I attempted to exclude generated code, this exclusion
+is based on heuristics which may have missed some cases.
+If you want to count duplicates, use the
+"--autogen", "--duplicates", and/or "--crossdups" options.
+If you want to count automatically generated files, pass
+the "--autogen" option mentioned above.
+<li>Origin:
+You can choose what source code you'll measure.
+Normally physical SLOC doesn't include an unmodified
+``vendor-supplied language support library'' nor a
+``vendor-supplied system or utility''.
+However, if this is what you are measuring, then you need to include it.
+If you include such code, your set will be different
+than the usual ``basic definition.''
+<li>Functionality: I included counts of unintentionally inoperative code
+(e.g., dead or unused code).
+It is very difficult to automatically detect such code
+in general for many languages.
+For example, a program not directly invoked by anything else nor
+installed by the installer is much more likely to be a test program,
+which you may want to include in the count (you often would include it
+if you're estimating effort).
+Clearly, discerning human ``intent'' is hard to automate.
+</ol>
+<p>
+Otherwise, this counter follows Park's
+``basic definition'' of a physical line of code, even down to Park's
+language-specific definitions where Park defined them for a language.
+
+
+<p>
+<h1><a name="miscellaneous">Miscellaneous Notes</a></h1>
+<p>
+There are other undocumented analysis tools in the original tar file.
+Most of them are specialized scripts for my circumstances, but feel
+free to use them as you wish.
+<p>
+If you're packaging this program, don't just copy every executable
+into the system "bin" directory - many of the files are those
+specialized scripts.
+Just put in the bin directory every executable documented here, plus the
+the files they depend on (there aren't that many).
+See the RPM specification file to see what's actually installed.
+<p>
+You have to take any measure of SLOC (including this one) with a
+large grain of salt.
+Physical SLOC is sensitive to the format of source code.
+There's a correlation between SLOC and development effort, and some
+correlation between SLOC and functionality,
+but there's absolutely no correlation between SLOC
+and either "quality" or "value".
+<p>
+A problem of physical SLOC is that it's sensitive to formatting,
+and that's a legitimate (and known) problem with the measure.
+However, to be fair, logical SLOC is influenced by coding style too.
+For example, the following two phrases are semantically identical,
+but will have different logical SLOC values:
+<pre>
+ int i, j; /* 1 logical SLOC */
+
+ int i; /* 2 logical SLOC, but it does the same thing */
+ int j;
+</pre>
+<p>
+If you discover other information that can be divided up by
+data directory children (e.g., the license used), it's probably best
+to add that to each subdirectory (e.g., as a "license" file in the
+subdirectory).
+Then you can modify tools like get_sloc
+to add them to their display.
+<p>
+I developed SLOCCount for my own use, not originally as
+a community tool, so it's certainly not beautiful code.
+However, I think it's serviceable - I hope you find it useful.
+Please send me patches for any improvements you make!
+<p>
+You can't use this tool as-is with some estimation models, such as COCOMO II,
+because this tool doesn't compute logical SLOC.
+I certainly would accept code contributions to add the ability to
+measure logical SLOC (or related measures such as
+Cyclomatic Complexity and Cyclomatic density);
+selecting them could be a compile-time option.
+However, measuring logical SLOC takes more development effort, so I
+haven't done so; see USC's "CodeCount" for a set of code that
+measures logical SLOC for some languages
+(though I've had trouble with CodeCount - in particular, its C counter
+doesn't correctly handle large programs like the Linux kernel).
+
+
+<p>
+<h1><a name="license">SLOCCount License</a></h1>
+<p>
+Here is the SLOCCount License; the file COPYING contains the standard
+GPL version 2 license:
+<pre>
+=====================================================================
+SLOCCount
+Copyright (C) 2000-2001 David A. Wheeler (dwheeler, at, dwheeler.com)
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=====================================================================
+</pre>
+<p>
+While it's not formally required by the license, please give credit
+to me and this software in any report that uses results generated by it.
+<p>
+This document was written by David A. Wheeler (dwheeler, at, dwheeler.com),
+and is
+(C) Copyright 2001 David A. Wheeler.
+This document is covered by the license (GPL) listed above.
+<p>
+The license <i>does</i> give you the right to
+use SLOCCount to analyze proprietary programs.
+
+<p>
+<h1><a name="related-tools">Related Tools</a></h1>
+<p>
+One available toolset is
+<a href="http://sunset.usc.edu/research/CODECOUNT">CodeCount</a>.
+I tried using this toolset, but I eventually gave up.
+It had too many problems handling the code I was trying to analyze, and it
+does a poor job automatically categorizing code.
+It also has no support for many of today's languages (such as Python,
+Perl, Ruby, PHP, and so on).
+However, it does a lot of analysis and measurements that SLOCCount
+doesn't do, so it all depends on your need.
+Its license appeared to be open source, but it's quite unusual and
+I'm not enough of a lawyer to be able to confirm that.
+<p>
+Another tool that's available is <a href="http://csdl.ics.hawaii.edu/Research/LOCC/LOCC.html">LOCC</a>.
+It's available under the GPL.
+It can count Java code, and there's experimental support for C++.
+LOCC is really intended for more deeply analyzing each Java file;
+what's particularly interesting about it is that it can measure
+"diffs" (how much has changed).
+See
+<a href="http://csdl.ics.hawaii.edu/Publications/MasterList.html#csdl2-00-10">
+A comparative review of LOCC and CodeCount</a>.
+<p>
+<a href="http://sourceforge.net/projects/cccc">
+CCCC</a> is a tool which analyzes C++ and Java files
+and generates a report on various metrics of the code.
+Metrics supported include lines of code, McCabe's complexity,
+and metrics proposed by Chidamber &amp; Kemerer and Henry &amp; Kafura.
+(You can see
+<a href="http://cccc.sourceforge.net/">Time Littlefair's comments</a>).
+CCCC is in the public domain.
+It reports on metrics that sloccount doesn't, but sloccount can handle
+far more computer languages.
+
+<p>
+<h1><a name="submitting-changes">Submitting Changes</a></h1>
+<p>
+The GPL license doesn't require you to submit changes you make back to
+its maintainer (currently me),
+but it's highly recommended and wise to do so.
+Because others <i>will</i> send changes to me, a version you make on your
+own will slowly because obsolete and incompatible.
+Rather than allowing this to happen, it's better to send changes in to me
+so that the latest version of SLOCCount also has the
+features you're looking for.
+If you're submitting support for new languages, be sure that your
+chnage correctly ignores files that aren't in that new language
+(some filename extensions have multiple meanings).
+You might want to look at the <a href="TODO">TODO</a> file first.
+<p>
+When you send changes to me, send them as "diff" results so that I can
+use the "patch" program to install them.
+If you can, please send ``unified diffs'' -- GNU's diff can create these
+using the "-u" option.
+</body>
+
diff --git a/sloccount.html.orig b/sloccount.html.orig
new file mode 100644
index 0000000..dd0ad54
--- /dev/null
+++ b/sloccount.html.orig
@@ -0,0 +1,2440 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<title>SLOCCount User's Guide</title>
+</head>
+<body bgcolor="#FFFFFF">
+<center>
+<font size="+3"><b><span class="title">SLOCCount User's Guide</span></b></font>
+<br>
+<font size="+2"><span class="author">by David A. Wheeler (dwheeler, at, dwheeler.com)</span></font>
+<br>
+<font size="+2"><span class="pubdate">December 2, 2002</span></font>
+<br>
+<font size="+2"><span class="version">Version 2.20</span></font>
+</center>
+<p>
+<h1><a name="introduction">Introduction</a></h1>
+<p>
+SLOCCount (pronounced "sloc-count") is a suite of programs for counting
+physical source lines of code (SLOC) in potentially large software systems.
+Thus, SLOCCount is a "software metrics tool" or "software measurement tool".
+SLOCCount was developed by David A. Wheeler,
+originally to count SLOC in a GNU/Linux distribution, but it can be
+used for counting the SLOC of arbitrary software systems.
+<p>
+SLOCCount is known to work on Linux systems, and has been tested
+on Red Hat Linux versions 6.2, 7, and 7.1.
+SLOCCount should run on many other Unix-like systems (if Perl is installed),
+in particular, I would expect a *BSD system to work well.
+Windows users can run sloccount by first installing
+<a href="http://sources.redhat.com/cygwin">Cygwin</a>.
+SLOCCount is much slower on Windows/Cygwin, and it's not as easy to install
+or use on Windows, but it works.
+Of course, feel free to upgrade to an open source Unix-like system
+(such as Linux or *BSD) instead :-).
+<p>
+SLOCCount can count physical SLOC for a wide number of languages.
+Listed alphabetically, they are
+Ada, Assembly (for many machines and assemblers),
+awk (including gawk and nawk),
+Bourne shell (and relatives such as bash, ksh, zsh, and pdksh),
+C, C++, C# (also called C-sharp or cs), C shell (including tcsh),
+COBOL, Expect, Fortran, Haskell,
+Java, lex (including flex),
+LISP (including Scheme),
+makefiles (though they aren't usually shown in final reports),
+Modula3, Objective-C, Pascal, Perl, PHP, Python, Ruby, sed,
+SQL (normally not shown),
+TCL, and Yacc.
+It can gracefully handle awkward situations in many languages,
+for example, it can determine the
+syntax used in different assembly language files and adjust appropriately,
+it knows about Python's use of string constants as comments, and it
+can handle various Perl oddities (e.g., perlpods, here documents,
+and Perl's _&nbsp;_END_&nbsp;_ marker).
+It even has a "generic" SLOC counter that you may be able to use count the
+SLOC of other languages (depending on the language's syntax).
+<p>
+SLOCCount can also take a large list of files and automatically categorize
+them using a number of different heuristics.
+The heuristics automatically determine if a file
+is a source code file or not, and if so, which language it's written in.
+For example,
+it knows that ".pc" is usually a C source file for an Oracle preprocessor,
+but it can detect many circumstances where it's actually a file about
+a "PC" (personal computer).
+For another example, it knows that ".m" is the standard extension for
+Objective-C, but it will check the file contents to
+see if really is Objective-C.
+It will even examine file headers to attempt to accurately determine
+the file's true type.
+As a result, you can analyze large systems completely automatically.
+<p>
+Finally, SLOCCount has some report-generating tools
+to collect the data generated,
+and then present it in several different formats and sorted different ways.
+The report-generating tool can also generate simple tab-separated files
+so data can be passed on to other analysis tools (such as spreadsheets
+and database systems).
+<p>
+SLOCCount will try to quickly estimate development time and effort given only
+the lines of code it computes, using the original Basic COCOMO model.
+This estimate can be improved if you can give more information about the project.
+See the
+<a href="#cocomo">discussion below about COCOMO, including intermediate COCOMO</a>,
+if you want to improve the estimates by giving additional information about
+the project.
+<p>
+SLOCCount is open source software/free software (OSS/FS),
+released under the GNU General Public License (GPL), version 2;
+see the <a href="#license">license below</a>.
+The master web site for SLOCCount is
+<a href="http://www.dwheeler.com/sloccount">http://www.dwheeler.com/sloccount</a>.
+You can learn a lot about SLOCCount by reading the paper that caused its
+creation, available at
+<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>.
+Feel free to see my master web site at
+<a href="http://www.dwheeler.com">http://www.dwheeler.com</a>, which has
+other material such as the
+<a href="http://www.dwheeler.com/secure-programs"><i>Secure Programming
+for Linux and Unix HOWTO</i></a>,
+my <a href="http://www.dwheeler.com/oss_fs_refs.html">list of
+OSS/FS references</a>, and my paper
+<a href="http://www.dwheeler.com/oss_fs_why.html"><i>Why OSS/FS? Look at
+the Numbers!</i></a>
+Please send improvements by email
+to dwheeler, at, dwheeler.com (DO NOT SEND SPAM - please remove the
+commas, remove the spaces, and change the word "at" into the at symbol).
+<p>
+The following sections first give a "quick start"
+(discussing how to use SLOCCount once it's installed),
+discuss basic SLOCCount concepts,
+how to install it, how to set your PATH,
+how to install source code on RPM-based systems if you wish, and
+more information on how to use the "sloccount" front-end.
+This is followed by material for advanced users:
+how to use SLOCCount tools individually (for when you want more control
+than the "sloccount" tool gives you), designer's notes,
+the definition of SLOC, and miscellaneous notes.
+The last sections states the license used (GPL) and gives
+hints on how to submit changes to SLOCCount (if you decide to make changes
+to the program).
+
+
+<p>
+<h1><a name="quick-start">Quick Start</a></h1>
+<p>
+Once you've installed SLOCCount (discussed below),
+you can measure an arbitrary program by typing everything
+after the dollar sign into a terminal session:
+<pre>
+ $ sloccount <i>topmost-source-code-directory</i>
+</pre>
+<p>
+The directory listed and all its descendants will be examined.
+You'll see output while it calculates,
+culminating with physical SLOC totals and
+estimates of development time, schedule, and cost.
+If the directory contains a set of directories, each of which is
+a different project developed independently,
+use the "--multiproject" option so the effort estimations
+can correctly take this into account.
+<p>
+You can redisplay the data different ways by using the "--cached"
+option, which skips the calculation stage and re-prints previously
+computed information.
+You can use other options to control what's displayed:
+"--filecount" shows counts of files instead of SLOC, and
+"--details" shows the detailed information about every source code file.
+So, to display all the details of every file once you've previously
+calculated the results, just type:
+<pre>
+ sloccount --cached --details
+</pre>
+<p>
+You'll notice that the default output ends with a request.
+If you use this data (e.g., in a report), please
+credit that data as being "generated using 'SLOCCount' by David A. Wheeler."
+I make no money from this program, so at least please give me some credit.
+<p>
+SLOCCount tries to ignore all automatically generated files, but its
+heuristics to detect this are necessarily imperfect (after all, even humans
+sometimes have trouble determining if a file was automatically genenerated).
+If possible, try to clean out automatically generated files from
+the source directories --
+in many situations "make clean" does this.
+<p>
+There's more to SLOCCount than this, but first we'll need to
+explain some basic concepts, then we'll discuss other options
+and advanced uses of SLOCCount.
+
+<p>
+<h1><a name="concepts">Basic Concepts</a></h1>
+<p>
+SLOCCount counts physical SLOC, also called "non-blank, non-comment lines".
+More formally, physical SLOC is defined as follows:
+``a physical source line of code (SLOC) is a line ending
+in a newline or end-of-file marker,
+and which contains at least one non-whitespace non-comment character.''
+Comment delimiters (characters other than newlines starting and ending
+a comment) are considered comment characters.
+Data lines only including whitespace
+(e.g., lines with only tabs and spaces in multiline strings) are not included.
+<p>
+In SLOCCount, there are 3 different directories:
+<ol>
+<li>The "source code directory", a directory containing the source code
+ being measured
+ (possibly in recursive subdirectories). The directories immediately
+ contained in the source code directory will normally be counted separately,
+ so it helps if your system is designed so that this top set of directories
+ roughly represents the system's major components.
+ If it doesn't, there are various tricks you can use to group source
+ code into components, but it's more work.
+ You don't need write access to the source code directory, but
+ you do need read access to all files, and read and search (execute) access
+ to all subdirectories.
+<li>The "bin directory", the directory containing the SLOCCount executables.
+ By default, installing the program creates a subdirectory
+ named "sloccount-VERSION" which is the bin directory.
+ The bin directory must be part of your PATH.
+<li>The "data directory", which stores the analysis results.
+ When measuring programs using "sloccount", by default
+ this is the directory ".slocdata" inside your home directory.
+ When you use the advanced SLOCCount tools directly,
+ in many cases this must be your "current" directory.
+ Inside the data directory are "data directory children" - these are
+ subdirectories that contain a file named "filelist", and each child
+ is used to represent a different project or a different
+ major component of a project.
+</ol>
+<p>
+SLOCCount can handle many different programming languages, and separate
+them by type (so you can compare the use of each).
+Here is the set of languages, sorted alphabetically;
+common filename extensions are in
+parentheses, with SLOCCount's ``standard name'' for the language
+listed in brackets:
+<ol>
+<li>Ada (.ada, .ads, .adb, .pad) [ada]
+<li>Assembly for many machines and assemblers (.s, .S, .asm) [asm]
+<li>awk (.awk) [awk]
+<li>Bourne shell and relatives such as bash, ksh, zsh, and pdksh (.sh) [sh]
+<li>C (.c, .pc, .ec, .ecp) [ansic]
+<li>C++ (.C, .cpp, .cxx, .cc, .pcc) [cpp]
+<li>C# (.cs) [cs]
+<li>C shell including tcsh (.csh) [csh]
+<li>COBOL (.cob, .cbl, .COB, .CBL) [cobol]
+<li>Expect (.exp) [exp]
+<li>Fortran (.f, .f77, .F) [fortran]
+<li>Haskell (.hs) [haskell]; please preprocess .lhs files.
+<li>Java (.java) [java]
+<li>lex (.l) [lex]
+<li>LISP including Scheme (.el, .scm, .lsp, .jl) [lisp]
+<li>makefiles (makefile) [makefile]
+<li>ML (.ml, .ml3) [ml]
+<li>Modula3 (.m3, .i3) [modula3]
+<li>Objective-C (.m) [objc]
+<li>Pascal (.p, .pas) [pascal]
+<li>Perl (.pl, .pm, .perl) [perl]
+<li>PHP (.php, .php[3456], .inc) [php]
+<li>Python (.py) [python]
+<li>Ruby (.rb) [ruby]
+<li>sed (.sed) [sed]
+<li>sql (.sql) [sql]
+<li>TCL (.tcl, .tk, .itk) [tcl]
+<li>Yacc (.y) [yacc]
+</ol>
+
+<p>
+<h1><a name="installing">Installing SLOCCount</a></h1>
+<p>
+Obviously, before using SLOCCount you'll need to install it.
+SLOCCount depends on other programs, in particular perl, bash,
+a C compiler (gcc will do), and md5sum
+(you can get a useful md5sum program in the ``textutils'' package
+on many Unix-like systems), so you'll need to get them installed
+if they aren't already.
+<p>
+If your system uses RPM version 4 or greater to install software
+(e.g., Red Hat Linux 7 or later), just download the SLOCCount RPM
+and install it using a normal installation command; from the text line
+you can use:
+<pre>
+ rpm -Uvh sloccount*.rpm
+</pre>
+<p>
+Everyone else will need to install from a tar file, and Windows users will
+have to install Cygwin before installing sloccount.
+<p>
+If you're using Windows, you'll need to first install
+<a href="http://sources.redhat.com/cygwin">Cygwin</a>.
+By installing Cygwin, you'll install an environment and a set of
+open source Unix-like tools.
+Cygwin essentially creates a Unix-like environment in which sloccount can run.
+You may be able to run parts of sloccount without Cygwin, in particular,
+the perl programs should run in the Windows port of Perl, but you're
+on your own - many of the sloccount components expect a Unix-like environment.
+If you want to install Cygwin, go to the
+<a href="http://sources.redhat.com/cygwin">Cygwin main page</a>
+and install it.
+If you're using Cygwin, <b>install it to use Unix newlines, not
+DOS newlines</b> - DOS newlines will cause odd errors in SLOCCount
+(and probably other programs, too).
+I have only tested a "full" Cygwin installation, so I suggest installing
+everything.
+If you're short on disk space, at least install
+binutils, bash, fileutils, findutils,
+gcc, grep, gzip, make, man, perl, readline,
+sed, sh-utils, tar, textutils, unzip, and zlib;
+you should probably install vim as well,
+and there may be other dependencies as well.
+By default Cygwin will create a directory C:\cygwin\home\NAME,
+and will set up the ability to run Unix programs
+(which will think that the same directory is called /home/NAME).
+Now double-click on the Cygwin icon, or select from the Start menu
+the selection Programs / Cygnus Solutions / Cygwin Bash shell;
+you'll see a terminal screen with a Unix-like interface.
+Now follow the instructions (next) for tar file users.
+<p>
+If you're installing from the tar file, download the file
+(into your home directory is fine).
+Unpacking the file will create a subdirectory, so if you want the
+unpacked subdirectory to go somewhere special, "cd" to where you
+want it to go.
+Most likely, your home directory is just fine.
+Now gunzip and untar SLOCCount (the * replaces the version #) by typing
+this at a terminal session:
+<pre>
+ gunzip -c sloccount*.tar.gz | tar xvf -
+</pre>
+Replace "sloccount*.tar.gz" shown above
+with the full path of the downloaded file, wherever that is.
+You've now created the "bin directory", which is simply the
+"sloccount-VERSION" subdirectory created by the tar command
+(where VERSION is the version number).
+<p>
+Now you need to compile the few compiled programs in the "bin directory" so
+SLOCCount will be ready to go.
+First, cd into the newly-created bin directory, by typing:
+<pre>
+ cd sloccount*
+</pre>
+<p>
+You may then need to override some installation settings.
+You can can do this by editing the supplied makefile, or alternatively,
+by providing options to "make" whenever you run make.
+The supplied makefile assumes your C compiler is named "gcc", which
+is true for most Linux systems, *BSD systems, and Windows systems using Cygwin.
+If this isn't true, you'll need to set
+the "CC" variable to the correct value (e.g., "cc").
+You can also modify where the files are stored; this variable is
+called PREFIX and its default is /usr/local
+(older versions of sloccount defaulted to /usr).
+<p>
+If you're using Windows and Cygwin, you
+<b>must</b> override one of the installation
+settings, EXE_SUFFIX, for installation to work correctly.
+One way to set this value is to edit the "makefile" file so that
+the line beginning with "EXE_SUFFIX" reads as follows:
+<pre>
+ EXE_SUFFIX=.exe
+</pre>
+If you're using Cygwin and you choose to modify the "makefile", you
+can use any text editor on the Cygwin side, or you can use a
+Windows text editor if it can read and write Unix-formatted text files.
+Cygwin users are free to use vim, for example.
+If you're installing into your home directory and using the default locations,
+Windows text editors will see the makefile as file
+C:\cygwin\home\NAME\sloccount-VERSION\makefile.
+Note that the Windows "Notepad" application doesn't work well, because it's not
+able to handle Unix text files correctly.
+Since this can be quite a pain, Cygus users may instead decide to override
+make the makefile values instead during installation.
+<p>
+Finally, compile the few compiled programs in it by typing "make":
+<pre>
+ make
+</pre>
+If you didn't edit the makefile in the previous step, you
+need to provide options to make invocations to set the correct values.
+This is done by simply saying (after "make") the name of the variable,
+an equal sign, and its correct value.
+Thus, to compile the program on a Windows system using Cygus, you can
+skip modifying the makefile file by typing this instead of just "make":
+<pre>
+ make EXE_SUFFIX=.exe
+</pre>
+<p>
+If you want, you can install sloccount for system-wide use without
+using the RPM version.
+Windows users using Cygwin should probably do this, particularly
+if they chose a "local" installation.
+To do this, first log in as root (Cygwin users don't need to do this
+for local installation).
+Edit the makefile to match your system's conventions, if necessary,
+and then type "make install":
+<pre>
+ make install
+</pre>
+If you need to set some make options, remember to do that here too.
+If you use "make install", you can uninstall it later using
+"make uninstall".
+Installing sloccount for system-wide use is optional;
+SLOCCount works without a system-wide installation.
+However, if you don't install sloccount system-wide, you'll need to
+set up your PATH variable; see the section on
+<a href="#path">setting your path</a>.
+<p>
+A note for Cygwin users (and some others): some systems, including Cygwin,
+don't set up the environment quite right and thus can't display the manual
+pages as installed.
+The problem is that they forget to search /usr/local/share/man for
+manual pages.
+If you want to read the installed manual pages, type this
+into a Bourne-like shell:
+<pre>
+ MANPATH=/usr/local/share/man:/usr/share/man:/usr/man
+ export MANPATH
+</pre>
+Or, if you use a C shell:
+<pre>
+ setenv MANPATH "/usr/local/share/man:/usr/share/man:/usr/man"
+</pre>
+From then on, you'll be able to view the reference manual pages
+by typing "man sloccount" (or by using whatever manual page display system
+you prefer).
+<p>
+
+<p>
+<h1><a name="installing-source">Installing The Source Code To Measure</a></h1>
+<p>
+Obviously, you must install the software source code you're counting,
+so somehow you must create the "source directory"
+with the source code to measure.
+You must also make sure that permissions are set so the software can
+read these directories and files.
+<p>
+For example, if you're trying to count the SLOC for an RPM-based Linux system,
+install the software source code by doing the following as root
+(which will place all source code into the source directory
+/usr/src/redhat/BUILD):
+<ol>
+<li>Install all source rpm's:
+<pre>
+ mount /mnt/cdrom
+ cd /mnt/cdrom/SRPMS
+ rpm -ivh *.src.rpm
+</pre>
+<li>Remove RPM spec files you don't want to count:
+<pre>
+ cd ../SPECS
+ (look in contents of spec files, removing what you don't want)
+</pre>
+<li>build/prep all spec files:
+<pre>
+ rpm -bp *.spec
+</pre>
+<li>Set permissions so the source files can be read by all:
+<pre>
+ chmod -R a+rX /usr/src/redhat/BUILD
+</pre>
+</ol>
+<p>
+Here's an example of how to download source code from an
+anonymous CVS server.
+Let's say you want to examine the source code in GNOME's "gnome-core"
+directory, as stored at the CVS server "anoncvs.gnome.org".
+Here's how you'd do that:
+<ol>
+<li>Set up site and login parameters:
+<pre>
+ export CVSROOT=':pserver:anonymous@anoncvs.gnome.org:/cvs/gnome'
+</pre>
+<li>Log in:
+<pre>
+ cvs login
+</pre>
+<li>Check out the software (copy it to your local directory), using
+mild compression to save on bandwidth:
+<pre>
+ cvs -z3 checkout gnome-core
+</pre>
+</ol>
+<p>
+Of course, if you have a non-anonymous account, you'd set CVSROOT
+to reflect this. For example, to log in using the "pserver"
+protocol as ACCOUNT_NAME, do:
+<pre>
+ export CVSROOT=':pserver:ACCOUNT_NAME@cvs.gnome.org:/cvs/gnome'
+</pre>
+<p>
+You may need root privileges to install the source code and to give
+another user permission to read it, but please avoid running the
+sloccount program as root.
+Although I know of no specific reason this would be a problem,
+running any program as root turns off helpful safeguards.
+<p>
+Although SLOCCount tries to detect (and ignore) many cases where
+programs are automatically generated, these heuristics are necessarily
+imperfect.
+So, please don't run any programs that generate other programs - just
+do enough to get the source code prepared for counting.
+In general you shouldn't run "make" on the source code, and if you have,
+consider running "make clean" or "make really_clean" on the source code first.
+It often doesn't make any difference, but identifying those circumstances
+is difficult.
+<p>
+SLOCCount will <b>not</b> automatically uncompress files that are
+compressed/archive files (such as .zip, .tar, or .tgz files).
+Often such files are just "left over" old versions or files
+that you're already counting.
+If you want to count the contents of compressed files, uncompress them first.
+<p>
+SLOCCount also doesn't delve into files using "literate programming"
+techniques, in part because there are too many incompatible formats
+that implement it.
+Thus, run the tools to extract the code from the literate programming files
+before running SLOCCount.
+For example, if you have many literate Haskell files (.lhs), please
+extract them.
+
+
+<h1><a name="path">Setting your PATH</a></h1>
+Before you can run SLOCCount, you'll need to make sure
+the SLOCCount "bin directory" is in your PATH.
+If you've installed SLOCCount in a system-wide location
+such as /usr/bin, then you needn't do more; the RPMs and "make install"
+commands essentially do this.
+<p>
+Otherwise, in Bourne-shell variants, type:
+<pre>
+ PATH="$PATH:<i>the directory with SLOCCount's executable files</i>"
+ export PATH
+</pre>
+Csh users should instead type:
+<pre>
+ setenv PATH "$PATH:<i>the directory with SLOCCount's executable files</i>"
+</pre>
+
+<h1><a name="using-basics">Using SLOCCount: The Basics</a></h1>
+
+Normal use of SLOCCount is very simple.
+In a terminal window just type "sloccount", followed by a
+list of the source code directories to count.
+If you give it only a single directory, SLOCCount tries to be
+a little clever and break the source code into
+subdirectories for purposes of reporting:
+<ol>
+<li>if directory has at least
+two subdirectories, then those subdirectories will be used as the
+breakdown (see the example below).
+<li>If the single directory contains files as well as directories
+(or if you give sloccount some files as parameters), those files will
+be assigned to the directory "top_dir" so you can tell them apart
+from other directories.
+<li>If there's a subdirectory named "src", then that subdirectory is again
+broken down, with all the further subdirectories prefixed with "src_".
+So if directory "X" has a subdirectory "src", which contains subdirectory
+"modules", the program will report a separate count from "src_modules".
+</ol>
+In the terminology discussed above, each of these directories would become
+"data directory children."
+<p>
+You can also give "sloccount" a list of directories, in which case the
+report will be broken down by these directories
+(make sure that the basenames of these directories differ).
+SLOCCount normally considers all descendants of these directories,
+though unless told otherwise it ignores symbolic links.
+<p>
+This is all easier to explain by example.
+Let's say that we want to measure Apache 1.3.12 as installed using an RPM.
+Once it's installed, we just type:
+<pre>
+ sloccount /usr/src/redhat/BUILD/apache_1.3.12
+</pre>
+The output we'll see shows status reports while it analyzes things,
+and then it prints out:
+
+<pre>
+SLOC Directory SLOC-by-Language (Sorted)
+24728 src_modules ansic=24728
+19067 src_main ansic=19067
+8011 src_lib ansic=8011
+5501 src_os ansic=5340,sh=106,cpp=55
+3886 src_support ansic=2046,perl=1712,sh=128
+3823 src_top_dir sh=3812,ansic=11
+3788 src_include ansic=3788
+3469 src_regex ansic=3407,sh=62
+2783 src_ap ansic=2783
+1378 src_helpers sh=1345,perl=23,ansic=10
+1304 top_dir sh=1304
+104 htdocs perl=104
+31 cgi-bin sh=24,perl=7
+0 icons (none)
+0 conf (none)
+0 logs (none)
+
+
+ansic: 69191 (88.85%)
+sh: 6781 (8.71%)
+perl: 1846 (2.37%)
+cpp: 55 (0.07%)
+
+
+Total Physical Source Lines of Code (SLOC) = 77873
+Estimated Development Effort in Person-Years (Person-Months) = 19.36 (232.36)
+ (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05))
+Estimated Schedule in Years (Months) = 1.65 (19.82)
+ (Basic COCOMO model, Months = 2.5 * (person-months**0.38))
+Estimated Average Number of Developers (Effort/Schedule) = 11.72
+Total Estimated Cost to Develop = $ 2615760
+ (average salary = $56286/year, overhead = 2.4).
+
+Please credit this data as "generated using 'SLOCCount' by David A. Wheeler."
+</pre>
+<p>
+Interpreting this should be straightward.
+The Apache directory has several subdirectories, including "htdocs", "cgi-bin",
+and "src".
+The "src" directory has many subdirectories in it
+("modules", "main", and so on).
+Code files directly
+contained in the main directory /usr/src/redhat/BUILD/apache_1.3.12
+is labelled "top_dir", while
+code directly contained in the src subdirectory is labelled "src_top_dir".
+Code in the "src/modules" directory is labelled "src_modules" here.
+The output shows each major directory broken
+out, sorted from largest to smallest.
+Thus, the "src/modules" directory had the most code of the directories,
+24728 physical SLOC, all of it in C.
+The "src/helpers" directory had a mix of shell, perl, and C; note that
+when multiple languages are shown, the list of languages in that child
+is also sorted from largest to smallest.
+<p>
+Below the per-component set is a list of all languages used,
+with their total SLOC shown, sorted from most to least.
+After this is the total physical SLOC (77,873 physical SLOC in this case).
+<p>
+Next is an estimation of the effort and schedule (calendar time)
+it would take to develop this code.
+For effort, the units shown are person-years (with person-months
+shown in parentheses); for schedule, total years are shown first
+(with months in parentheses).
+When invoked through "sloccount", the default assumption is that all code is
+part of a single program; the "--multiproject" option changes this
+to assume that all top-level components are independently developed
+programs.
+When "--multiproject" is invoked, each project's efforts are estimated
+separately (and then summed), and the schedule estimate presented
+is the largest estimated schedule of any single component.
+<p>
+By default the "Basic COCOMO" model is used for estimating
+effort and schedule; this model
+includes design, code, test, and documentation time (both
+user/admin documentation and development documentation).
+<a href="#cocomo">See below for more information on COCOMO</a>
+as it's used in this program.
+<p>
+Next are several numbers that attempt to estimate what it would have cost
+to develop this program.
+This is simply the amount of effort, multiplied by the average annual
+salary and by the "overhead multiplier".
+The default annual salary is
+$56,286 per year; this value was from the
+<i>ComputerWorld</i>, September 4, 2000's Salary Survey
+of an average U.S. programmer/analyst salary in the year 2000.
+You might consider using other numbers
+(<i>ComputerWorld</i>'s September 3, 2001 Salary Survey found
+an average U.S. programmer/analyst salary making $55,100, senior
+systems programmers averaging $68,900, and senior systems analysts averaging
+$72,300).
+
+<p>
+Overhead is much harder to estimate; I did not find a definitive source
+for information on overheads.
+After informal discussions with several cost analysts,
+I determined that an overhead of 2.4
+would be representative of the overhead sustained by
+a typical software development company.
+As discussed in the next section, you can change these numbers too.
+
+<p>
+You may be surprised by the high cost estimates, but remember,
+these include design, coding, testing, documentation (both for users
+and for programmers), and a wrap rate for corporate overhead
+(to cover facilities, equipment, accounting, and so on).
+Many programmers forget these other costs and are shocked by the high figures.
+If you only wanted to know the costs of the coding, you'd need to get
+those figures.
+
+
+<p>
+Note that if any top-level directory has a file named PROGRAM_LICENSE,
+that file is assumed to contain the name of the license
+(e.g., "GPL", "LGPL", "MIT", "BSD", "MPL", and so on).
+If there is at least one such file, sloccount will also report statistics
+on licenses.
+
+
+<h1><a name="options">Options</a></h1>
+The program "sloccount" has a large number of options
+so you can control what is selected for counting and how the
+results are displayed.
+<p>
+There are several options that control which files are selected
+for counting:
+<pre>
+ --duplicates Count all duplicate files as normal files
+ --crossdups Count duplicate files if they're in different data directory
+ children.
+ --autogen Count automatically generated files
+ --follow Follow symbolic links (normally they're ignored)
+ --addlang Add languages to be counted that normally aren't shown.
+ --append Add more files to the data directory
+</pre>
+Normally, files which have exactly the same content are counted only once
+(data directory children are counted alphabetically, so the child
+"first" in the alphabet will be considered the owner of the master copy).
+If you want them all counted, use "--duplicates".
+Sometimes when you use sloccount, each directory represents a different
+project, in which case you might want to specify "--crossdups".
+The program tries to reject files that are automatically generated
+(e.g., a C file generated by bison), but you can disable this as well.
+You can use "--addlang" to show makefiles and SQL files, which aren't
+usually counted.
+<p>
+Possibly the most important option is "--cached".
+Normally, when sloccount runs, it computes a lot of information and
+stores this data in a "data directory" (by default, "~/.slocdata").
+The "--cached" option tells sloccount to use data previously computed,
+greatly speeding up use once you've done the computation once.
+The "--cached" option can't be used along with the options used to
+select what files should be counted.
+You can also select a different data directory by using the
+"--datadir" option.
+<p>
+There are many options for controlling the output:
+<pre>
+ --filecount Show counts of files instead of SLOC.
+ --details Present details: present one line per source code file.
+ --wide Show "wide" format. Ignored if "--details" selected
+ --multiproject Assume each directory is for a different project
+ (this modifies the effort estimation calculations)
+ --effort F E Change the effort estimation model, so that it uses
+ F as the factor and E as the exponent.
+ --schedule F E Change the schedule estimation model, so that it uses
+ F as the factor and E as the exponent.
+ --personcost P Change the average annual salary to P.
+ --overhead O Change the annual overhead to O.
+ -- End of options
+</pre>
+<p>
+Basically, the first time you use sloccount, if you're measuring
+a set of projects (not a single project) you might consider
+using "--crossdups" instead of the defaults.
+Then, you can redisplay data quickly by using "--cached",
+combining it with options such as "--filecount".
+If you want to send the data to another tool, use "--details".
+<p>
+If you're measuring a set of projects, you probably ought to pass
+the option "--multiproject".
+When "--multiproject" is used, efforts are computed for each component
+separately and summed, and the time estimate used is the maximum
+single estimated time.
+<p>
+The "--details" option dumps the available data in 4 columns,
+tab-separated, where each line
+represents a source code file in the data directory children identified.
+The first column is the SLOC, the second column is the language type,
+the third column is the name of the data directory child
+(as it was given to get_sloc_details),
+and the last column is the absolute pathname of the source code file.
+You can then pipe this output to "sort" or some other tool for further
+analysis (such as a spreadsheet or RDBMS).
+<p>
+You can change the parameters used to estimate effort using "--effort".
+For example, if you believe that in the environment being used
+you can produce 2 KSLOC/month scaling linearly, then
+that means that the factor for effort you should use is 1/2 = 0.5 month/KSLOC,
+and the exponent for effort is 1 (linear).
+Thus, you can use "--effort 0.5 1".
+<p>
+You can also set the annual salary and overheads used to compute
+estimated development cost.
+While "$" is shown, there's no reason you have to use dollars;
+the unit of development cost is the same unit as the unit used for
+"--personcost".
+
+<h1><a name="cocomo">More about COCOMO</a></h1>
+
+<p>
+By default SLOCCount uses a very simple estimating model for effort and schedule:
+the basic COCOMO model in the "organic" mode (modes are more fully discussed below).
+This model estimates effort and schedule, including design, code, test,
+and documentation time (both user/admin documentation and development documentation).
+Basic COCOMO is a nice simple model, and it's used as the default because
+it doesn't require any information about the code other than the SLOC count
+already computed.
+<p>
+However, basic COCOMO's accuracy is limited for the same reason -
+basic COCOMO doesn't take a number of important factors into account.
+If you have the necessary information, you can improve the model's accuracy
+by taking these factors into account. You can at least quickly determine
+if the right "mode" is being used to improve accuracy. You can also
+use the "Intermediate COCOMO" and "Detailed COCOMO" models that take more
+factors into account, and are likely to produce more accurate estimates as
+a result. Take these estimates as just that - estimates - they're not grand truths.
+If you have the necessary information,
+you can improve the model's accuracy by taking these factors into account, and
+pass this additional information to sloccount using its
+"--effort" and "--schedule" options (as discussed in
+<a href="options">options</a>).
+<p>
+To use the COCOMO model, you first need to determine if your application's
+mode, which can be "Organic", "embedded", or "semidetached".
+Most software is "organic" (which is why it's the default).
+Here are simple definitions of these modes:
+<ul>
+<li>Organic: Relatively small software teams develop software in a highly
+familiar, in-house environment. &nbsp;It has a generally stable development
+environment, minimal need for innovative algorithms, and requirements can
+be relaxed to avoid extensive rework.</li>
+<li>Semidetached: This is an intermediate
+step between organic and embedded. This is generally characterized by reduced
+flexibility in the requirements.</li>
+<li>Embedded: The project must operate
+within tight (hard-to-meet) constraints, and requirements
+and interface specifications are often non-negotiable.
+The software will be embedded in a complex environment that the
+software must deal with as-is.</li>
+</ul>
+By default, SLOCCount uses the basic COCOMO model in the organic mode.
+For the basic COCOMO model, here are the critical factors for --effort and --schedule:<br>
+<ul>
+<li>Organic: effort factor = 2.4, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+<li>Semidetached: effort factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+<li>Embedded: effort factor = 3.6, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li>
+</ul>
+Thus, if you want to use SLOCCount but the project is actually semidetached,
+you can use the options "--effort 3.0 1.12 --schedule 2.5 0.35"
+to get a more accurate estimate.
+<br>
+For more accurate estimates, you can use the intermediate COCOMO models.
+For intermediate COCOMO, use the following figures:<br>
+<ul>
+ <li>Organic: effort base factor = 2.3, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+ <li>Semidetached: effort base factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+ <li>Embedded: effort base factor = 2.8, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li>
+</ul>
+The intermediate COCOMO values for schedule are exactly the same as the basic
+COCOMO model; the starting effort values are not quite the same, as noted
+in Boehm's book. However, in the intermediate COCOMO model, you don't
+normally use the effort factors as-is, you use various corrective factors
+(called cost drivers). To use these corrections, you consider
+all the cost drivers, determine what best describes them,
+and multiply their corrective values by the effort base factor.
+The result is the final effort factor.
+Here are the cost drivers (from Boehm's book, table 8-2 and 8-3):
+
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+ <tbody>
+ <tr>
+ <th rowspan="1" colspan="2">Cost Drivers
+ </th>
+ <th rowspan="1" colspan="6">Ratings
+ </th>
+ </tr>
+ <tr>
+ <th>ID
+ </th>
+ <th>Driver Name
+ </th>
+ <th>Very Low
+ </th>
+ <th>Low
+ </th>
+ <th>Nominal
+ </th>
+ <th>High
+ </th>
+ <th>Very High
+ </th>
+ <th>Extra High
+ </th>
+ </tr>
+ <tr>
+ <td>RELY
+ </td>
+ <td>Required software reliability
+ </td>
+ <td>0.75 (effect is slight inconvenience)
+ </td>
+ <td>0.88 (easily recovered losses)
+ </td>
+ <td>1.00 (recoverable losses)
+ </td>
+ <td>1.15 (high financial loss)
+ </td>
+ <td>1.40 (risk to human life)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>DATA
+ </td>
+ <td>Database size
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>0.94 (database bytes/SLOC &lt; 10)
+ </td>
+ <td>1.00 (D/S between 10 and 100)
+ </td>
+ <td>1.08 (D/S between 100 and 1000)
+ </td>
+ <td>1.16 (D/S &gt; 1000)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>CPLX
+ </td>
+ <td>Product complexity
+ </td>
+ <td>0.70 (mostly straightline code, simple arrays, simple expressions)
+ </td>
+ <td>0.85
+ </td>
+ <td>1.00
+ </td>
+ <td>1.15
+ </td>
+ <td>1.30
+ </td>
+ <td>1.65 (microcode, multiple resource scheduling, device timing dependent coding)
+ </td>
+ </tr>
+ <tr>
+ <td>TIME
+ </td>
+ <td>Execution time constraint
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>1.00 (&lt;50% use of available execution time)
+ </td>
+ <td>1.11 (70% use)
+ </td>
+ <td>1.30 (85% use)
+ </td>
+ <td>1.66 (95% use)
+ </td>
+ </tr>
+ <tr>
+ <td>STOR
+ </td>
+ <td>Main storage constraint
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>1.00&nbsp;(&lt;50% use of available storage)</td>
+ <td>1.06 (70% use)
+ </td>
+ <td>1.21 (85% use)
+ </td>
+ <td>1.56 (95% use)
+ </td>
+ </tr>
+ <tr>
+ <td>VIRT
+ </td>
+ <td>Virtual machine (HW and OS) volatility
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>0.87 (major change every 12 months, minor every month)
+ </td>
+ <td>1.00 (major change every 6 months, minor every 2 weeks)</td>
+ <td>1.15 (major change every 2 months, minor changes every week)
+ </td>
+ <td>1.30 (major changes every 2 weeks, minor changes every 2 days)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>TURN
+ </td>
+ <td>Computer turnaround time
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>0.87 (interactive)
+ </td>
+ <td>1.00 (average turnaround &lt; 4 hours)
+ </td>
+ <td>1.07
+ </td>
+ <td>1.15
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>ACAP
+ </td>
+ <td>Analyst capability
+ </td>
+ <td>1.46 (15th percentile)
+ </td>
+ <td>1.19 (35th percentile)
+ </td>
+ <td>1.00 (55th percentile)
+ </td>
+ <td>0.86 (75th percentile)
+ </td>
+ <td>0.71 (90th percentile)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>AEXP
+ </td>
+ <td>Applications experience
+ </td>
+ <td>1.29 (&lt;= 4 months experience)
+ </td>
+ <td>1.13 (1 year)
+ </td>
+ <td>1.00 (3 years)
+ </td>
+ <td>0.91 (6 years)
+ </td>
+ <td>0.82 (12 years)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>PCAP
+ </td>
+ <td>Programmer capability
+ </td>
+ <td>1.42 (15th percentile)
+ </td>
+ <td>1.17 (35th percentile)
+ </td>
+ <td>1.00 (55th percentile)
+ </td>
+ <td>0.86 (75th percentile)
+ </td>
+ <td>0.70 (90th percentile)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>VEXP
+ </td>
+ <td>Virtual machine experience
+ </td>
+ <td>1.21 (&lt;= 1 month experience)
+ </td>
+ <td>1.10 (4 months)
+ </td>
+ <td>1.00 (1 year)
+ </td>
+ <td>0.90 (3 years)
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>LEXP
+ </td>
+ <td>Programming language experience
+ </td>
+ <td>1.14 (&lt;= 1 month experience)
+ </td>
+ <td>1.07 (4 months)
+ </td>
+ <td>1.00 (1 year)
+ </td>
+ <td>0.95 (3 years)
+ </td>
+ <td>&nbsp;
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>MODP
+ </td>
+ <td>Use of "modern" programming practices (e.g. structured programming)
+ </td>
+ <td>1.24 (No use)
+ </td>
+ <td>1.10
+ </td>
+ <td>1.00 (some use)
+ </td>
+ <td>0.91
+ </td>
+ <td>0.82 (routine use)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>TOOL
+ </td>
+ <td>Use of software tools
+ </td>
+ <td>1.24
+ </td>
+ <td>1.10
+ </td>
+ <td>1.00 (basic tools)
+ </td>
+ <td>0.91 (test tools)
+ </td>
+ <td>0.83 (requirements, design, management, documentation tools)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ <tr>
+ <td>SCED
+ </td>
+ <td>Required development schedule
+ </td>
+ <td>1.23 (75% of nominal)
+ </td>
+ <td>1.08 (85% of nominal)
+ </td>
+ <td>1.00 (nominal)
+ </td>
+ <td>1.04 (130% of nominal)
+ </td>
+ <td>1.10 (160% of nominal)
+ </td>
+ <td>&nbsp;
+ </td>
+ </tr>
+ </tbody>
+</table>
+<br>
+<br>
+<br>
+So, once all of the factors have been multiplied together, you can
+then use the "--effort" flag to set more accurate factors and exponents.
+Note that some factors will probably not be "nominal" simply because
+times have changed since COCOMO was originally developed, so a few regions
+that were desirable have become more common today.
+For example,
+for many software projects of today, virtual machine volatility tends to
+be low, and the
+use of "modern" programming practices (structured programming,
+object-oriented programming, abstract data types, etc.) tends to be high.
+COCOMO automatically handles these differences.
+<p>
+For example, imagine that you're examining a fairly simple application that
+meets the "organic" requirements. Organic projects have a base factor
+of 2.3 and exponents of 1.05, as noted above.
+We then examine all the factors to determine a corrected base factor.
+For this example, imagine
+that we determine the values of these cost drivers are as follows:<br>
+<br>
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+
+ <tbody>
+ <tr>
+ <td rowspan="1" colspan="2">Cost Drivers<br>
+ </td>
+ <td rowspan="1" colspan="2">Ratings<br>
+ </td>
+ </tr>
+ <tr>
+ <td>ID<br>
+ </td>
+ <td>Driver Name<br>
+ </td>
+ <td>Rating<br>
+ </td>
+ <td>Multiplier<br>
+ </td>
+ </tr>
+ <tr>
+ <td>RELY<br>
+ </td>
+ <td>Required software reliability<br>
+ </td>
+ <td>Low - easily recovered losses<br>
+ </td>
+ <td>0.88<br>
+ </td>
+ </tr>
+ <tr>
+ <td>DATA<br>
+ </td>
+ <td>Database size<br>
+ </td>
+ <td>Low<br>
+ </td>
+ <td>0.94<br>
+ </td>
+ </tr>
+ <tr>
+ <td>CPLX<br>
+ </td>
+ <td>Product complexity<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>TIME<br>
+ </td>
+ <td>Execution time constraint<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>STOR<br>
+ </td>
+ <td>Main storage constraint<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>VIRT<br>
+ </td>
+ <td>Virtual machine (HW and OS) volatility<br>
+ </td>
+ <td>Low (major change every 12 months, minor every month)<br>
+ </td>
+ <td>0.87<br>
+ </td>
+ </tr>
+ <tr>
+ <td>TURN<br>
+ </td>
+ <td>Computer turnaround time<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>ACAP<br>
+ </td>
+ <td>Analyst capability<br>
+ </td>
+ <td>Nominal (55th percentile)<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>AEXP<br>
+ </td>
+ <td>Applications experience<br>
+ </td>
+ <td>Nominal (3 years)<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>PCAP<br>
+ </td>
+ <td>Programmer capability<br>
+ </td>
+ <td>Nominal (55th percentile)<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>VEXP<br>
+ </td>
+ <td>Virtual machine experience<br>
+ </td>
+ <td>High (3 years)<br>
+ </td>
+ <td>0.90<br>
+ </td>
+ </tr>
+ <tr>
+ <td>LEXP<br>
+ </td>
+ <td>Programming language experience<br>
+ </td>
+ <td>High (3 years)<br>
+ </td>
+ <td>0.95<br>
+ </td>
+ </tr>
+ <tr>
+ <td>MODP<br>
+ </td>
+ <td>Use of "modern" programming practices (e.g. structured programming)<br>
+ </td>
+ <td>High (Routine use)<br>
+ </td>
+ <td>0.82<br>
+ </td>
+ </tr>
+ <tr>
+ <td>TOOL<br>
+ </td>
+ <td>Use of software tools<br>
+ </td>
+ <td>Nominal (basic tools)<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td>SCED<br>
+ </td>
+ <td>Required development schedule<br>
+ </td>
+ <td>Nominal<br>
+ </td>
+ <td>1.00<br>
+ </td>
+ </tr>
+
+
+
+
+ </tbody>
+</table>
+<p>
+So, starting with the base factor (2.3 in this case), and then multiplying
+the driver values, we'll compute a final factor of:
+By multiplying these driver values together in this example, we compute:<br>
+<pre>2.3*0.88*0.94*1*1*1*0.87*1.00*1*1*1*0.90*0.95*0.82*1*1</pre>
+For this
+example, the final factor for the effort calculation is 1.1605. You would then
+invoke sloccount with "--effort 1.1605 1.05" to pass in the corrected factor
+and exponent for the effort estimation.
+You don't need to use "--schedule" to set the factors when you're using
+organic model, because in SLOCCount
+the default values are the values for the organic model.
+You can set scheduling parameters manually
+anyway by setting "--schedule 2.5 0.38".
+You <i>do</i> need to use the --schedule option for
+embedded and semidetached projects, because those modes have different
+schedule parameters. The final command would be:<br>
+<br>
+sloccount --effort 1.1605 1.05 --schedule 2.5 0.38 my_project<br>
+<p>
+The detailed COCOMO model requires breaking information down further.
+<p>
+For more information about the original COCOMO model, including the detailed
+COCOMO model, see the book
+<i>Software Engineering Economics</i> by Barry Boehm.
+<p>
+You may be surprised by the high cost estimates, but remember,
+these include design, coding, testing (including
+integration and testing), documentation (both for users
+and for programmers), and a wrap rate for corporate overhead
+(to cover facilities, equipment, accounting, and so on).
+Many programmers forget these other costs and are shocked by the high cost
+estimates.
+<p>
+If you want to know a subset of this cost, you'll need to isolate
+just those figures that you're trying to measure.
+For example, let's say you want to find the money a programmer would receive
+to do just the coding of the units of the program
+(ignoring wrap rate, design, testing, integration, and so on).
+According to Boehm's book (page 65, table 5-2),
+the percentage varies by product size.
+For effort, code and unit test takes 42% for small (2 KSLOC), 40% for
+intermediate (8 KSLOC), 38% for medium (32 KSLOC), and 36% for large
+(128 KSLOC).
+Sadly, Boehm doesn't separate coding from unit test; perhaps
+50% of the time is spent in unit test in traditional proprietary
+development (including fixing bugs found from unit test).
+If you want to know the income to the programmer (instead of cost to
+the company), you'll also want to remove the wrap rate.
+Thus, a programmer's income to <i>only</i> write the code for a
+small program (circa 2 KSLOC) would be 8.75% (42% x 50% x (1/2.4))
+of the default figure computed by SLOCCount.
+<p>
+In other words, less than one-tenth of the cost as computed by SLOCCount
+is what actually would be made by a programmer for a small program for
+just the coding task.
+Note that a proprietary commercial company that bid using
+this lower figure would rapidly go out of business, since this figure
+ignores the many other costs they have to incur to actually develop
+working products.
+Programs don't arrive out of thin air; someone needs to determine what
+the requirements are, how to design it, and perform at least
+some testing of it.
+<p>
+There's another later estimation model for effort and schedule
+called "COCOMO II", but COCOMO II requires logical SLOC instead
+of physical SLOC.
+SLOCCount doesn't currently measure logical SLOC, so
+SLOCCount doesn't currently use COCOMO II.
+Contributions of code to compute logical SLOC and then optionally
+use COCOMO II will be gratefully accepted.
+
+<h1><a name="specific-files">Counting Specific Files</a></h1>
+<p>
+If you want to count a specific subset, you can use the "--details"
+option to list individual files, pipe this into "grep" to select the
+files you're interested in, and pipe the result to
+my tool "print_sum" (which reads lines beginning with numbers, and
+returns the total of those numbers).
+If you've already done the analysis, an example would be:
+<pre>
+ sloccount --cached --details | grep "/some/subdirectory/" | print_sum
+</pre>
+<p>
+If you just want to count specific files, and you know what language
+they're in, you
+can just invoke the basic SLOC counters directly.
+By convention the simple counters are named "LANGUAGE_count",
+and they take on the command line a list of the
+source files to count.
+Here are some examples:
+<pre>
+ c_count *.c *.cpp *.h # Count C and C++ in current directory.
+ asm_count *.S # Count assembly.
+</pre>
+All the counters (*_count) program accept a &quot;-f FILENAME&quot; option, where FILENAME
+is a file containing the names of all the source files to count
+(one file per text line). If FILENAME is &quot;-&quot;, the
+ list of file names is taken from the standard input.
+The &quot;c_count&quot; program handles both C and C++ (but not objective-C;
+for that use objc_count).
+The available counters are
+ada_count,
+asm_count,
+awk_count,
+c_count,
+csh_count,
+exp_count,
+fortran_count,
+java_count,
+lex_count,
+lisp_count,
+ml_count,
+modula3_count,
+objc_count,
+pascal_count,
+perl_count,
+python_count,
+sed_count,
+sh_count,
+sql_count, and
+tcl_count.
+<p>
+There is also "generic_count", which takes as its first parameter
+the ``comment string'', followed by a list of files.
+The comment string begins a comment that ends at the end of the line.
+Sometimes, if you have source for a language not listed, generic_count
+will be sufficient.
+<p>
+The basic SLOC counters will send output to standard out, one line per file
+(showing the SLOC count and filename).
+The assembly counter shows some additional information about each file.
+The basic SLOC counters always complete their output with a line
+saying "Total:", followe by a line with the
+total SLOC count.
+
+<h1><a name="errors">Countering Problems and Handling Errors</a></h1>
+
+If you're analyzing unfamiliar code, there's always the possibility
+that it uses languages not processed by SLOCCount.
+To counter this, after running SLOCCount, run the following program:
+<pre>
+ count_unknown_ext
+</pre>
+This will look at the resulting data (in its default data directory
+location, ~/.slocdata) and report a sorted list of the file extensions
+for uncategorized ("unknown") files.
+The list will show every file extension and how many files had that
+extension, and is sorted by most common first.
+It's not a problem if an "unknown" type isn't a source code file, but
+if there are a significant number of source files in this category,
+you'll need to change SLOCCount to get an accurate result.
+
+<p>
+One error report that you may see is:
+<pre>
+ c_count ERROR - terminated in string in (filename)
+</pre>
+
+The cause of this is that c_count (the counter for C-like languages)
+keeps track of whether or not it's in a string, and when the counter
+reached the end of the file, it still thought it was in a string.
+
+<p>
+Note that c_count really does have to keep track of whether or
+not it's a string.
+For example, this is three lines of code, not two, because the
+``comment'' is actually in string data:
+
+<pre>
+ a = "hello
+ /* this is not a comment */
+ bye";
+</pre>
+<p>
+Usually this error means you have code that won't compile
+given certain #define settings. E.G., XFree86 has a line of code that's
+actually wrong (it has a string that's not terminated), but people
+don't notice because the #define to enable it is not usually set.
+Legitimate code can trigger this message, but code that triggers
+this message is horrendously formatted and is begging for problems.
+
+<p>
+In either case, the best way to handle the situation
+is to modify the source code (slightly) so that the code's intent is clear
+(by making sure that double-quotes balance).
+If it's your own code, you definitely should fix this anyway.
+You need to look at the double-quote (") characters. One approach is to
+just grep for double-quote, and look at every line for text that isn't
+terminated, e.g., printf("hello %s, myname);
+
+<p>
+SLOCcount reports warnings when an unusually
+large number of duplicate files are reported.
+A large number of duplicates <i>may</i> suggest that you're counting
+two different versions of the same program as though they were
+independently developed.
+You may want to cd into the data directory (usually ~/.slocdata), cd into
+the child directories corresponding to each component, and then look
+at their dup_list.dat files, which list the filenames that appeared
+to be duplicated (and what they duplicate with).
+
+
+<h1><a name="adding">Adding Support for New Languages</a></h1>
+SLOCcount handles many languages, but if it doesn't support one you need,
+you'll need to give the language a standard (lowercase ASCII) name,
+then modify SLOCcount to (1) detect and (2) count code in that language.
+
+<ol>
+<li>
+To detect a new language, you'll need to modify the program break_filelist.
+If the filename extension is reliable, you can modify the array
+%file_extensions, which maps various filename extensions into languages.
+If your needs are more complex, you'll need to modify the code
+(typically in functions get_file_type or file_type_from_contents)
+so that the correct file type is determined.
+For example, if a file with a given filename extension is only
+<i>sometimes</i> that type, you'll need to write code to examine the
+file contents.
+<li>
+You'll need to create a SLOC counter for that language type.
+It must have the name XYZ_count, where XYZ is the standard name for the
+language.
+<p>
+For some languages, you may be able to use the ``generic_count'' program
+to implement your counter - generic_count takes as its first argument
+the pattern which
+identifies comment begins (which continue until the end of the line);
+the other arguments are the files to count.
+Thus, the LISP counter looks like this:
+<pre>
+ #!/bin/sh
+ generic_count ';' $@
+</pre>
+The generic_count program won't work correctly if there are multiline comments
+(e.g., C) or multiline string constants.
+If your language is identical to C/C++'s syntax in terms of
+string constant definitions and commenting syntax
+(using // or /* .. */), then you can use the c_count program - in this case,
+modify compute_sloc_lang so that the c_count program is used.
+<p>
+Otherwise, you'll have to devise your own counting program.
+The program must generate files with the same format, e.g.,
+for every filename passed as an argument, it needs to return separate lines,
+where each line presents the SLOC
+for that file, a space, and the filename.
+(Note: the assembly language counter produces a slightly different format.)
+After that, print "Total:" on its own line, and the actual SLOC total
+on the following (last) line.
+</ol>
+
+<h1><a name="advanced">Advanced SLOCCount Use</a></h1>
+For most people, the previous information is enough.
+However, if you're measuring a large set of programs, or have unusual needs,
+those steps may not give you enough control.
+In that case, you may need to create your own "data directory"
+by hand and separately run the SLOCCount tools.
+Basically, "sloccount" (note the lower case) is the name for
+a high-level tool which invokes many other tools; this entire
+suite is named SLOCCount (note the mixed case).
+The next section will describe how to invoke the various tools "manually"
+so you can gain explicit control over the measuring process when
+the defaults are not to your liking, along with various suggestions
+for how to handle truly huge sets of data.
+<p>
+Here's how to manually create a "data directory" to hold
+intermediate results, and how to invoke each tool in sequence
+(with discussion of options):
+<ol>
+<li>Set your PATH to include the SLOCCount "bin directory", as discussed above.
+<li>Make an empty "data directory"
+(where all intermediate results will be stored);
+you can pick any name and location you like for this directory.
+Here, I'll use the name "data":
+<pre>
+ mkdir ~/data
+</pre>
+<li>Change your current directory to this "data directory":
+<pre>
+ cd ~/data
+</pre>
+The rest of these instructions assume that your current directory
+is the data directory.
+You can set up many different data directories if you wish, to analyze
+different source programs or analyze the programs in different ways;
+just "cd" to the one you want to work with.
+<li>(Optional) Some of the later steps will produce
+a lot of output while they're running.
+If you want to capture this information into a file, use the standard
+"script" command do to so.
+For example, "script run1" will save the output of everything you do into
+file "run1" (until you type control-D to stop saving the information).
+Don't forget that you're creating such a file, or it will become VERY large,
+and in particular don't type any passwords into such a session.
+You can store the script in the data directory, or create a subdirectory
+for such results - any data directory subdirectory that doesn't have the
+special file "filelist" is not a "data directory child" and is thus
+ignored by the later SLOCCount analysis routines.
+<li>Now initialize the "data directory".
+ In particular, initialization will create the "data directory children",
+ a set of subdirectories equivalent to the source code directory's
+ top directories. Each of these data directory children (subdirectories)
+ will contain a file named "filelist", which
+ lists all filenames in the corresponding source code directory.
+ These data directory children
+ will also eventually contain intermediate results
+ of analysis, which you can check for validity
+ (also, having a cache of these values speeds later analysis steps).
+ <p>
+ You use the "make_filelists" command to initialize a data directory.
+ For example, if your source code is in /usr/src/redhat/BUILD, run:
+<pre>
+ make_filelists /usr/src/redhat/BUILD/*
+</pre>
+<p>
+ Internally, make_filelists uses "find" to create the list of files, and
+ by default it ignores all symbolic links. However, you may need to
+ follow symbolic links; if you do, give make_filelists the
+ "--follow" option (which will use find's "-follow" option).
+ Here are make_filelists' options:
+<pre>
+ --follow Follow symbolic links
+ --datadir D Use this data directory
+ --skip S Skip basenames named S
+ --prefix P When creating children, prepend P to their name.
+ -- No more options
+</pre>
+<p>
+ Although you don't normally need to do so, if you want certain files to
+ not be counted at all in your analysis, you can remove
+ data directory children or edit the "filelist" files to do so.
+ There's no need to remove files which aren't source code files normally;
+ this is handled automatically by the next step.
+<p>
+ If you don't have a single source code directory where the subdirectories
+ represent the major components you want to count separately, you can
+ still use the tool but it's more work.
+ One solution is to create a "shadow" directory with the structure
+ you wish the program had, using symbolic links (you must use "--follow"
+ for this to work).
+ You can also just invoke make_filelists multiple times, with parameters
+ listing the various top-level directories you wish to include.
+ Note that the basenames of the directories must be unique.
+<p>
+ If there are so many directories (e.g., a massive number of projects)
+ that the command line is too long,
+ you can run make_filelists multiple times in the same
+ directory with different arguments to create them.
+ You may find "find" and/or "xargs" helpful in doing this automatically.
+ For example, here's how to do the same thing using "find":
+<pre>
+ find /usr/src/redhat/BUILD -maxdepth 1 -mindepth 1 -type d \
+ -exec make_filelists {} \;
+</pre>
+<li>Categorize each file.
+This means that we must determine which
+files contain source code (eliminating auto-generated and duplicate files),
+and of those files which language each file contains.
+The result will be a set of files in each subdirectory of the data directory,
+where each file represents a category (e.g., a language).
+<pre>
+ break_filelist *
+</pre>
+ At this point you might want to examine the data directory subdirectories
+ to ensure that "break_filelist" has correctly determined the types of
+ the various files.
+ In particular, the "unknown" category may have source files in a language
+ SLOCCount doesn't know about.
+ If the heuristics got some categorization wrong, you can modify the
+ break_filelist program and re-run break_filelist.
+<p>
+ By default break_filelist removes duplicates, doesn't count
+ automatically generated files as normal source code files, and
+ only gives some feedback. You can change these defaults with the
+ following options:
+<pre>
+ --duplicates Count all duplicate files as normal files
+ --crossdups Count duplicate files if they're in different data directory
+ children (i.e., in different "filelists")
+ --autogen Count automatically generated files
+ --verbose Present more verbose status information while processing.
+</pre>
+<p>
+ Duplicate control in particular is an issue; you probably don't want
+ duplicates counted, so that's the default.
+ Duplicate files are detected by determining if their MD5 checksums
+ are identical; the "first" duplicate encountered is the only one kept.
+ Normally, since shells sort directory names, this means that the
+ file in the alphabetically first child directory is the one counted.
+ You can change this around by listing directories in the sort order you
+ wish followed by "*"; if the same data directory child
+ is requested for analysis more
+ than once in a given execution, it's skipped after the first time.
+ So, if you want any duplicate files with child directory "glibc" to
+ count as part of "glibc", then you should provide the data directory children
+ list as "glibc *".
+<p>
+ Beware of choosing something other than "*" as the parameter here,
+ unless you use the "--duplicates" or "--crossdups" options.
+ The "*" represents the list of data directory children to examine.
+ Since break_filelist skips duplicate files identified
+ in a particular run, if you run break_filelist
+ on only certain children, some duplicate files won't be detected.
+ If you're allowing duplicates (via "--duplicates" or
+ "--crossdups"), then this isn't a problem.
+ Or, you can use the ``--duplistfile'' option to store and retrieve
+ hashes of files, so that additional files can be handled.
+<p>
+ If there are so many directories that the command line is too long,
+ you can run break_filelist multiple times and give it
+ a subset of the directories each time.
+ You'll need to use one of the duplicate control options to do this.
+ I would suggest using "--crossdups", which
+ means that duplicates inside a child will only be counted once,
+ eliminating at least some of the problems of duplicates.
+ Here's the equivalent of "break_filelist *" when there are a large
+ number of subdirectories:
+<pre>
+ find . -maxdepth 1 -mindepth 1 -type d -exec break_filelist --crossdups {} \;
+</pre>
+ Indeed, for all of the later commands where "*" is listed as the parameter
+ in these instructions
+ (for the list of data directory children), just run the above "find"
+ command and replace "break_filelist --crossdups" with the command shown.
+<li>(Optional)
+If you're not very familiar with the program you're analyzing, you
+might not be sure that "break_filelist" has correctly identified
+all of the files.
+In particular, the system might be using an unexpected
+programming language or extension not handled by SLOCCount.
+If this is your circumstance, you can just run the command:
+<pre>
+ count_unknown_ext
+</pre>
+(note that this command is unusual - it doesn't take any arguments,
+since it's hard to imagine a case where you wouldn't want every
+directory examined).
+Unlike the other commands discussed, this one specifically looks at
+${HOME}/.slocdata.
+This command presents a list of extensions which are unknown to break_filelist,
+with the most common ones listed first.
+The output format is a name, followed by the number of instances;
+the name begins with a "." if it's an extension, or, if there's no
+extension, it begins with "/" followed by the base name of the file.
+break_filelist already knows about common extensions such as ".gif" and ".png",
+as well as common filenames like "README".
+You can also view the contents of each of the data directory children's
+files to see if break_filelist has correctly categorized the files.
+<li>Now compute SLOC and filecounts for each language; you can compute for all
+ languages at once by calling:
+<pre>
+ compute_all *
+</pre>
+If you only want to compute SLOC for a specific language,
+you can invoke compute_sloc_lang, which takes as its first parameter
+the SLOCCount name of the language ("ansic" for C, "cpp" for C++,
+"ada" for Ada, "asm" for assembly), followed by the list
+of data directory children.
+Note that these names are a change from version 1.0, which
+called the master program "compute_all",
+and had "compute_*" programs for each language.
+<p>
+Notice the "*"; you can replace the "*" with just the list of
+data directory children (subdirectories) to compute, if you wish.
+Indeed, you'll notice that nearly all of the following commands take a
+list of data directory children as arguments; when you want all of them, use
+"*" (as shown in these instructions), otherwise, list the ones you want.
+<p>
+When you run compute_all or compute_sloc_lang, each data directory
+child (subdirectory)
+is consulted in turn for a list of the relevant files, and the
+SLOC results are placed in that data directory child.
+In each child,
+the file "LANGUAGE-outfile.dat" lists the information from the
+basic SLOC counters.
+That is, the oufile lists the SLOC and filename
+(the assembly outfile has additional information), and ends with
+a line saying "Total:" followed by a line showing the total SLOC of
+that language in that data directory child.
+The file "all-physical.sloc" has the final total SLOC for every language
+in that child directory (i.e., it's the last line of the outfile).
+<li>(Optional) If you want, you can also use USC's CodeCount.
+I've had trouble with these programs, so I don't do this normally.
+However, you're welcome to try - they support logical SLOC measures
+as well as physical ones (though not for most of the languages
+supported by SLOCCount).
+Sadly, they don't seem to compile in gcc without a lot of help, they
+used fixed-width buffers that make me nervous, and I found a
+number of bugs (e.g., it couldn't handle "/* text1 *//* text2 */" in
+C code, a format that's legal and used often in the Linux kernel).
+If you want to do this,
+modify the files compute_c_usc and compute_java_usc so they point to the
+right directories, and type:
+<pre>
+ compute_c_usc *
+</pre>
+<li>Now you can analyze the results. The main tool for
+presenting SLOCCount results is "get_sloc", e.g,:
+<pre>
+ get_sloc * | less
+</pre>
+The get_sloc program takes many options, including:
+<pre>
+ --filecount Display number of files instead of SLOC (SLOC is the default)
+ --wide Use "wide" format instead (tab-separated columns)
+ --nobreak Don't insert breaks in long lines
+ --sort X Sort by "X", where "X" is the name of a language
+ ("ansic", "cpp", "fortran", etc.), or "total".
+ By default, get_sloc sorts by "total".
+ --nosort Don't sort - just present results in order of directory
+ listing given.
+ --showother Show non-language totals (e.g., # duplicate files).
+ --oneprogram When computing effort, assume that all files are part of
+ a single program. By default, each subdirectory specified
+ is assumed to be a separate, independently-developed program.
+ --noheader Don't show the header
+ --nofooter Don't show the footer (the per-language values and totals)
+</pre>
+<p>
+Note that unlike the "sloccount" tool, get_sloc requires the current
+directory to be the data directory.
+<p>
+If you're displaying SLOC, get_sloc will also estimate the time it
+would take to develop the software using COCOMO (using its "basic" model).
+By default, this figure assumes that each of the major subdirectories was
+developed independently of the others;
+you can use "--oneprogram" to make the assumption that all files are
+part of the same program.
+The COCOMO model makes many other assumptions; see the paper at
+<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>
+for more information.
+<p>
+If you need to do more analysis, you might want to use the "--wide"
+option and send the data to another tool such as a spreadsheet
+(e.g., gnumeric) or RDBMS (e.g., PostgreSQL).
+Using the "--wide" option creates tab-separated data, which is easier to
+import.
+You may also want to use the "--noheader" and/or "--nofooter" options to
+simplify porting the data to another tool.
+<p>
+Note that in version 1.0, "get_sloc" was called "get_data".
+<p>
+If you have so many data directory children that you can't use "*"
+on the command line, get_sloc won't be as helpful.
+Feel free to patch get_sloc to add this capability (as another option),
+or use get_sloc_detail (discussed next) to feed the data into another tool.
+<li>(Optional) If you just can't get the information you need from get_sloc,
+then you can get the raw results of everything and process the data
+yourself.
+I have a little tool to do this, called get_sloc_details.
+You invoke it in a similar manner:
+<pre>
+get_sloc_details *
+</pre>
+</ol>
+
+<p>
+<h1><a name="designer-notes">Designer's Notes</a></h1>
+<p>
+Here are some ``designer's notes'' on how SLOCCount works,
+including what it can handle.
+<p>
+The program break_filelist
+has categories for each programming language it knows about,
+plus the special categories ``not'' (not a source code file),
+``auto'' (an automatically-generated file and thus not to be counted),
+``zero'' (a zero-length file),
+``dup'' (a duplicate of another file as determined by an md5 checksum),
+and
+``unknown'' (a file which doesn't seem to be a source code file
+nor any of these other categories).
+It's a good idea to examine
+the ``unknown'' items later, checking the common extensions
+to ensure you have not missed any common types of code.
+<p>
+The program break_filelist uses lots of heuristics to correctly
+categorize files.
+Here are few notes about its heuristics:
+<ol>
+<li>
+break_filelist first checks for well-known extensions (such as .gif) that
+cannot be program files, and for a number of common generated filenames.
+<li>
+It then peeks at the first few lines for "#!" followed by a legal script
+name.
+Sometimes it looks further, for example, many Python programs
+invoke "env" and then use it to invoke python.
+<li>
+If that doesn't work, it uses the extension to try to determine the category.
+For a number of languages, the extension is not reliable, so for those
+languages it examines the file contents and uses a set of heuristics
+to determine if the file actually belongs to that category.
+<li>
+Detecting automatically generated files is not easy, and it's
+quite conceivable that it won't detect some automatically generated files.
+The first 15 lines are examined, to determine if any of them
+include at the beginning of the line (after spaces and
+possible comment markers) one of the following phrases (ignoring
+upper and lower case distinctions):
+``generated automatically'',
+``automatically generated'',
+``this is a generated file'',
+``generated with the (something) utility'',
+or ``do not edit''.
+<li>A number of filename conventions are used, too.
+For example,
+any ``configure'' file is presumed to be automatically generated if
+there's a ``configure.in'' file in the same directory.
+<li>
+To eliminate duplicates,
+the program keeps md5 checksums of each program file.
+Any given md5 checksum is only counted once.
+Build directories are processed alphabetically, so
+if the same file content is in both directories ``a'' and ``b'',
+it will be counted only once as being part of ``a'' unless you make
+other arrangements.
+Thus, some data directory children with names later in the alphabet may appear
+smaller than would make sense at first glance.
+It is very difficult to eliminate ``almost identical'' files
+(e.g., an older and newer version of the same code, included in two
+separate packages), because
+it is difficult to determine when two ``similar'' files are essentially
+the same file.
+Changes such as the use of pretty-printers and massive renaming of variables
+could make small changes seem large, while the small files
+might easily appear to be the ``same''.
+Thus, files with different contents are simply considered different.
+<li>
+If all else fails, the file is placed in the ``unknown'' category for
+later analysis.
+</ol>
+<p>
+One complicating factor is that I wished to separate C, C++, and
+Objective-C code, but a header file ending with
+``.h'' or ``.hpp'' file could be any of these languages.
+In theory, ``.hpp'' is only C++, but I found that in practice this isn't true.
+I developed a number of heuristics to determine, for each file,
+what language a given header belonged to.
+For example, if a given directory has exactly one of these languages
+(ignoring header files),
+the header is assumed to belong to that category as well.
+Similarly, if there is a body file (e.g., ".c") that has the same name
+as the header file, then presumably the header file is of the same language.
+Finally, a header file with the keyword ``class'' is almost certainly not a
+C header file, but a C++ header file; otherwise it's assumed to
+be a C file.
+<p>
+None of the SLOC counters fully parse the source code; they just examine
+the code using simple text processing patterns to count the SLOC.
+In practice, by handling a number of special cases this seems to be fine.
+Here are some notes on some of the language counters;
+the language name is followed by common extensions in parentheses
+and the SLOCCount name of the language in brackets:
+<ol>
+<li>Ada (.ada, .ads, .adb) [ada]: Comments begin with "--".
+<li>Assembly (.s, .S, .asm) [asm]:
+Assembly languages vary greatly in the comment character they use,
+so my counter had to handle this variance.
+The assembly language counter (asm_count)
+first examines the file to determine if
+C-style ``/*'' comments and C preprocessor commands
+(e.g., ``#include'') are used.
+If both ``/*'' and ``*/'' are in the file, it's assumed that
+C-style comments are being used
+(since it is unlikely that <i>both</i> would be used
+as something else, say as string data, in the same assembly language file).
+Determining if a file used the C preprocessor was trickier, since
+many assembly files do use ``#'' as a comment character and some
+preprocessor directives are ordinary words that might be included
+in a human comment.
+The heuristic used is as follows: if #ifdef, #endif, or #include are used, the
+C preprocessor is used; or if at least three lines have either #define or #else,
+then the C preprocessor is used.
+No doubt other heuristics are possible, but this at least seems to produce
+reasonable results.
+The program then determines what the comment character is by identifying
+which punctuation mark (from a set of possible marks)
+is the most common non-space initial character on a line
+(ignoring ``/'' and ``#'' if C comments or preprocessor commands,
+respectively, are used).
+Once the comment character has been determined, and it's been determined
+if C-style comments are allowed, the lines of code
+are counted in the file.
+<li>awk (.awk) [awk]: Comments begin with "#".
+<li>C (.c) [ansic]: Both traditional C comments (/* .. */) and C++
+(//) comments are supported.
+Technically, C doesn't support "//", but in practice many C programs use them.
+The C counter understands multi-line strings, so
+comment characters (/* .. */ and //) are treated as data inside strings.
+Conversely, the counter knows that any double-quote characters inside a
+comment does not begin a C/C++ string.
+<li>C++ (.C, .cpp, .cxx, .cc) [cpp]: The same counter is used for
+both C and C++.
+Note that break_filelist does try to separate C from C++ for purposes
+of accounting between them.
+<li>C# (.cs): The same counter is used as for C and C++.
+Note that there are no "header" filetypes in C#.
+<li>C shell (.csh) [csh]: Comments begin with "#".
+<li>COBOL (.cob, .cbl) [cobol]: SLOCCount
+detects if a "freeform" command has been given; until such a command is
+given, fixed format is assumed.
+In fixed format, comments have a "*" or "/" in column 7 or column 1;
+any line that's not a comment, and has a nonwhitespace character after column 7
+(the indicator area) is counted as a source line of code.
+In a freeform style, any line beginning with optional whitespace and
+then "*" or "/" is considered a comment; any noncomment line
+with a nonwhitespace characeter is counted as SLOC.
+<li>Expect (.exp) [exp]: Comments begin with "#".
+<li>Fortran (.f) [fortran]: Comment-only lines are lines
+where column 1 character = C, c, *, or !.
+Note that this is really only a Fortran-77 SLOC counter.
+<.li>Haskell (.hs) [haskell]:
+This counter handles block comments {- .. -} and single line comments (--);
+pragmas {-# .. -} are counted as SLOC.
+This is a simplistic counter,
+and can be fooled by certain unlikely combinations of block comments
+and other syntax (line-ending comments or strings).
+In particular, "Hello {-" will be incorrectly interpreted as a
+comment block begin, and "{- -- -}" will be incorrectly interpreted as a
+comment block begin without an end.
+Note that .lhs (literate Haskell) is <i>not</i> supported; please
+preprocess .lhs files into .hs files before counting.
+See the
+<a href="http://www.haskell.org/onlinereport/literate.html">Haskell 98
+report section on literate Haskell</a> for more information.
+<li>Java (.java) [java]: Java is counted using the same counter as C and C++.
+<li>lex (.l) [lex]: Uses traditional C /* .. */ comments.
+Note that this does not use the counter as C/C++ internally, since
+it's quite legal in lex to have "//" (where it is NOT a comment).
+<li>LISP (.el, .scm, .lsp, .jl) [lisp]: Comments begin with ";".
+<li>ML (.ml, .mli) [ml]: Comments are enclosed in (* .. *).
+<li>Modula3 (.m3, .i3) [modula3]: Comments are enclosed in (* .. *).
+<li>Objective-C (.m) [objc]: Comments are old C-style /* .. */ comments.
+<li>Pascal (.p, .pas) [pascal]: Comments are enclosed in curly braces {}
+or (*..*). This counter has known weaknesses; see the BUGS section of
+the manual page for more information.
+<li>Perl (.pl, .pm, .perl) [perl]:
+Comments begin with "#".
+Perl permits in-line ``perlpod'' documents, ``here'' documents, and an
+__END__ marker that complicate code-counting.
+Perlpod documents are essentially comments, but a ``here'' document
+may include text to generate them (in which case the perlpod document
+is data and should be counted).
+The __END__ marker indicates the end of the file from Perl's
+viewpoint, even if there's more text afterwards.
+<li>PHP (.php, .php[3456], .inc) [php]:
+Code is counted as PHP code if it has a .php file extension;
+it's also counted if it has an .inc extension and looks like PHP code.
+SLOCCount does <b>not</b> count PHP code embedded in HTML files normally,
+though its lower-level routines can do so if you want to
+(use php_count to do this).
+Any of the various ways to begin PHP code can be used
+(&lt;? .. ?&gt;,
+&lt;?php .. ?&gt;,
+&lt;script language="php"&gt; .. &lt;/script&gt;,
+or even &lt;% .. %&gt;).
+Any of the PHP comment formats (C, C++, and shell) can be used, and
+any string constant formats ("here document", double quote, and single
+quote) can be used as well.
+<li>Python (.py) [python]:
+Comments begin with "#".
+Python has a convention that, at the beginning of a definition
+(e.g., of a function, method, or class), an unassigned string can be
+placed to describe what's being defined. Since this is essentially
+a comment (though it doesn't syntactically look like one), the counter
+avoids counting such strings, which may have multiple lines.
+To handle this,
+strings which started the beginning of a line were not counted.
+Python also has the ``triple quote'' operator, permitting multiline
+strings; these needed to be handled specially.
+Triple quote stirngs are normally considered as data, regardless of
+content, unless they were used as a comment about a definition.
+<li>Ruby (.rb) [ruby]: Comments begin with "#".
+<li>sed (.sed) [sed]: Comments begin with "#".
+Note that these are "sed-only" files; many uses of sed are embeded in
+shell scripts (and are categorized as shell scripts in those cases).
+<li>shell (.sh) [sh]: Comments begin with "#".
+Note that I classify ksh, bash, and the original Bourne shell sh together,
+because they have very similar syntaxes.
+For example, in all of these shells,
+setting a variable is expressed as "varname=value",
+while C shells use the use "set varname=value".
+<li>TCL (.tcl, .tk, .itk) [tcl]: Comments begin with "#".
+<li>Yacc (.y) [yacc]: Yacc is counted using the same counter as C and C++.
+</ol>
+<p>
+Much of the code is written in Perl, since it's primarily a text processing
+problem and Perl is good at that.
+Many short scripts are Bourne shell scripts (it's good at
+short scripts for calling other programs), and the
+basic C/C++ SLOC counter is written in C for speed.
+<p>
+I originally named it "SLOC-Count", but I found that some web search
+engines (notably Google) treated that as two words.
+By naming it "SLOCCount", it's easier to find by those who know
+the name of the program.
+<p>
+SLOCCount only counts physical SLOC, not logical SLOC.
+Logical SLOC counting requires much more code to implement,
+and I needed to cover a large number of programming languages.
+
+
+<p>
+<h1><a name="sloc-definition">Definition of SLOC</a></h1>
+<p>
+This tool measures ``physical SLOC.''
+Physical SLOC is defined as follows:
+``a physical source line of code (SLOC) is a line ending
+in a newline or end-of-file marker,
+and which contains at least one non-whitespace non-comment character.''
+Comment delimiters (characters other than newlines starting and ending
+a comment) are considered comment characters.
+Data lines only including whitespace
+(e.g., lines with only tabs and spaces in multiline strings) are not included.
+<p>
+To make this concrete, here's an example of a simple C program
+(it strips ANSI C comments out).
+On the left side is the running SLOC total, where "-" indicates a line
+that is not considered a physical "source line of code":
+<pre>
+ 1 #include &lt;stdio.h&gt;
+ -
+ - /* peek at the next character in stdin, but don't get it */
+ 2 int peek() {
+ 3 int c = getchar();
+ 4 ungetc(c, stdin);
+ 5 return c;
+ 6 }
+ -
+ 7 main() {
+ 8 int c;
+ 9 int incomment = 0; /* 1 = we are inside a comment */
+ -
+10 while ( (c = getchar()) != EOF) {
+11 if (!incomment) {
+12 if ((c == '/') &amp;&amp; (peek() == '*')) {incomment=1;}
+13 } else {
+14 if ((c == '*') &amp;&amp; (peek() == '/')) {
+15 c= getchar(); c=getchar(); incomment=0;
+16 }
+17 }
+18 if ((c != EOF) &amp;&amp; !incomment) {putchar(c);}
+19 }
+20 }
+</pre>
+<p>
+<a href="http://www.sei.cmu.edu/publications/documents/92.reports/92.tr.020.html">Robert E. Park et al.'s
+<i>Software Size Measurement:
+A Framework for Counting Source Statements</i></a>
+(Technical Report CMU/SEI-92-TR-20)
+presents a set of issues to be decided when trying to count code.
+The paper's abstract states:
+<blockquote><i>
+This report presents guidelines for defining, recording, and reporting
+two frequently used measures of software sizeÑ physical source lines
+and logical source statements.
+We propose a general framework for constructing size
+definitions and use it to derive operational methods for
+reducing misunderstandings in measurement results.
+</i></blockquote>
+<p>
+Using Park's framework, here is how physical lines of code are counted:
+<ol>
+<li>Statement Type: I used a physical line-of-code as my basis.
+I included executable statements, declarations
+(e.g., data structure definitions), and compiler directives
+(e.g., preprocessor commands such as #define).
+I excluded all comments and blank lines.
+<li>How Produced:
+I included all programmed code, including any files that had been modified.
+I excluded code generated with source code generators, converted with
+automatic translators, and those copied or reused without change.
+If a file was in the source package, I included it; if the file had
+been removed from a source package (including via a patch), I did
+not include it.
+<li>Origin: You select the files (and thus their origin).
+<li>Usage: You selects the files (and thus their usage), e.g.,
+you decide if you're going to
+include additional applications able to run on the system but not
+included with the system.
+<li>Delivery: You'll decide what code to include, but of course,
+if you don't have the code you can't count it.
+<li>Functionality: This tool will include both operative and inoperative code
+if they're mixed together.
+An example of intentionally ``inoperative'' code is
+code turned off by #ifdef commands; since it could be
+turned on for special purposes, it made sense to count it.
+An example of unintentionally ``inoperative'' code is dead or unused code.
+<li>Replications:
+Normally, duplicate files are ignored, unless you use
+the "--duplicates" or "--crossdups" option.
+The tool will count
+``physical replicates of master statements stored in
+the master code''.
+This is simply code cut and pasted from one place to another to reuse code;
+it's hard to tell where this happens, and since it has to be maintained
+separately, it's fair to include this in the measure.
+I excluded copies inserted, instantiated, or expanded when compiling
+or linking, and I excluded postproduction replicates
+(e.g., reparameterized systems).
+<li>Development Status: You'll decide what code
+should be included (and thus the development status of the code that
+you'll accept).
+<li>Languages: You can see the language list above.
+<li>Clarifications: I included all statement types.
+This included nulls, continues, no-ops, lone semicolons,
+statements that instantiate generics,
+lone curly braces ({ and }), and labels by themselves.
+</ol>
+<p>
+Thus, SLOCCount generally follows Park's ``basic definition'',
+but with the following exceptions depending on how you use it:
+<ol>
+<li>How Produced:
+By default, this tool excludes duplicate files and
+code generated with source code generators.
+After all, the COCOMO model states that the
+only code that should be counted is code
+``produced by project personnel'', whereas these kinds of files are
+instead the output of ``preprocessors and compilers.''
+If code is always maintained as the input to a code generator, and then
+the code generator is re-run, it's only the code generator input's size that
+validly measures the size of what is maintained.
+Note that while I attempted to exclude generated code, this exclusion
+is based on heuristics which may have missed some cases.
+If you want to count duplicates, use the
+"--autogen", "--duplicates", and/or "--crossdups" options.
+If you want to count automatically generated files, pass
+the "--autogen" option mentioned above.
+<li>Origin:
+You can choose what source code you'll measure.
+Normally physical SLOC doesn't include an unmodified
+``vendor-supplied language support library'' nor a
+``vendor-supplied system or utility''.
+However, if this is what you are measuring, then you need to include it.
+If you include such code, your set will be different
+than the usual ``basic definition.''
+<li>Functionality: I included counts of unintentionally inoperative code
+(e.g., dead or unused code).
+It is very difficult to automatically detect such code
+in general for many languages.
+For example, a program not directly invoked by anything else nor
+installed by the installer is much more likely to be a test program,
+which you may want to include in the count (you often would include it
+if you're estimating effort).
+Clearly, discerning human ``intent'' is hard to automate.
+</ol>
+<p>
+Otherwise, this counter follows Park's
+``basic definition'' of a physical line of code, even down to Park's
+language-specific definitions where Park defined them for a language.
+
+
+<p>
+<h1><a name="miscellaneous">Miscellaneous Notes</a></h1>
+<p>
+There are other undocumented analysis tools in the original tar file.
+Most of them are specialized scripts for my circumstances, but feel
+free to use them as you wish.
+<p>
+If you're packaging this program, don't just copy every executable
+into the system "bin" directory - many of the files are those
+specialized scripts.
+Just put in the bin directory every executable documented here, plus the
+the files they depend on (there aren't that many).
+See the RPM specification file to see what's actually installed.
+<p>
+You have to take any measure of SLOC (including this one) with a
+large grain of salt.
+Physical SLOC is sensitive to the format of source code.
+There's a correlation between SLOC and development effort, and some
+correlation between SLOC and functionality,
+but there's absolutely no correlation between SLOC
+and either "quality" or "value".
+<p>
+A problem of physical SLOC is that it's sensitive to formatting,
+and that's a legitimate (and known) problem with the measure.
+However, to be fair, logical SLOC is influenced by coding style too.
+For example, the following two phrases are semantically identical,
+but will have different logical SLOC values:
+<pre>
+ int i, j; /* 1 logical SLOC */
+
+ int i; /* 2 logical SLOC, but it does the same thing */
+ int j;
+</pre>
+<p>
+If you discover other information that can be divided up by
+data directory children (e.g., the license used), it's probably best
+to add that to each subdirectory (e.g., as a "license" file in the
+subdirectory).
+Then you can modify tools like get_sloc
+to add them to their display.
+<p>
+I developed SLOCCount for my own use, not originally as
+a community tool, so it's certainly not beautiful code.
+However, I think it's serviceable - I hope you find it useful.
+Please send me patches for any improvements you make!
+<p>
+You can't use this tool as-is with some estimation models, such as COCOMO II,
+because this tool doesn't compute logical SLOC.
+I certainly would accept code contributions to add the ability to
+measure logical SLOC (or related measures such as
+Cyclomatic Complexity and Cyclomatic density);
+selecting them could be a compile-time option.
+However, measuring logical SLOC takes more development effort, so I
+haven't done so; see USC's "CodeCount" for a set of code that
+measures logical SLOC for some languages
+(though I've had trouble with CodeCount - in particular, its C counter
+doesn't correctly handle large programs like the Linux kernel).
+
+
+<p>
+<h1><a name="license">SLOCCount License</a></h1>
+<p>
+Here is the SLOCCount License; the file COPYING contains the standard
+GPL version 2 license:
+<pre>
+=====================================================================
+SLOCCount
+Copyright (C) 2000-2001 David A. Wheeler (dwheeler, at, dwheeler.com)
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=====================================================================
+</pre>
+<p>
+While it's not formally required by the license, please give credit
+to me and this software in any report that uses results generated by it.
+<p>
+This document was written by David A. Wheeler (dwheeler, at, dwheeler.com),
+and is
+(C) Copyright 2001 David A. Wheeler.
+This document is covered by the license (GPL) listed above.
+<p>
+The license <i>does</i> give you the right to
+use SLOCCount to analyze proprietary programs.
+
+<p>
+<h1><a name="related-tools">Related Tools</a></h1>
+<p>
+One available toolset is
+<a href="http://sunset.usc.edu/research/CODECOUNT">CodeCount</a>.
+I tried using this toolset, but I eventually gave up.
+It had too many problems handling the code I was trying to analyze, and it
+does a poor job automatically categorizing code.
+It also has no support for many of today's languages (such as Python,
+Perl, Ruby, PHP, and so on).
+However, it does a lot of analysis and measurements that SLOCCount
+doesn't do, so it all depends on your need.
+Its license appeared to be open source, but it's quite unusual and
+I'm not enough of a lawyer to be able to confirm that.
+<p>
+Another tool that's available is <a href="http://csdl.ics.hawaii.edu/Research/LOCC/LOCC.html">LOCC</a>.
+It's available under the GPL.
+It can count Java code, and there's experimental support for C++.
+LOCC is really intended for more deeply analyzing each Java file;
+what's particularly interesting about it is that it can measure
+"diffs" (how much has changed).
+See
+<a href="http://csdl.ics.hawaii.edu/Publications/MasterList.html#csdl2-00-10">
+A comparative review of LOCC and CodeCount</a>.
+<p>
+<a href="http://sourceforge.net/projects/cccc">
+CCCC</a> is a tool which analyzes C++ and Java files
+and generates a report on various metrics of the code.
+Metrics supported include lines of code, McCabe's complexity,
+and metrics proposed by Chidamber &amp; Kemerer and Henry &amp; Kafura.
+(You can see
+<a href="http://cccc.sourceforge.net/">Time Littlefair's comments</a>).
+CCCC is in the public domain.
+It reports on metrics that sloccount doesn't, but sloccount can handle
+far more computer languages.
+
+<p>
+<h1><a name="submitting-changes">Submitting Changes</a></h1>
+<p>
+The GPL license doesn't require you to submit changes you make back to
+its maintainer (currently me),
+but it's highly recommended and wise to do so.
+Because others <i>will</i> send changes to me, a version you make on your
+own will slowly because obsolete and incompatible.
+Rather than allowing this to happen, it's better to send changes in to me
+so that the latest version of SLOCCount also has the
+features you're looking for.
+If you're submitting support for new languages, be sure that your
+chnage correctly ignores files that aren't in that new language
+(some filename extensions have multiple meanings).
+You might want to look at the <a href="TODO">TODO</a> file first.
+<p>
+When you send changes to me, send them as "diff" results so that I can
+use the "patch" program to install them.
+If you can, please send ``unified diffs'' -- GNU's diff can create these
+using the "-u" option.
+</body>
+
diff --git a/sloccount.spec b/sloccount.spec
new file mode 100644
index 0000000..62dd7b4
--- /dev/null
+++ b/sloccount.spec
@@ -0,0 +1,56 @@
+#
+# RPM spec file for "sloccount".
+#
+%define PKG_VERSION 2.26
+
+Name: sloccount
+Summary: Measures source lines of code (SLOC) in programs
+Version: %{PKG_VERSION}
+Release: 1
+Copyright: GPL
+Group: Development/Tools
+Source: http://www.dwheeler.com/sloccount/sloccount-%{PKG_VERSION}.tar.gz
+URL: http://www.dwheeler.com/sloccount
+Vendor: David A. Wheeler
+Packager: David A. Wheeler <dwheeler@dwheeler.com>
+Prefix: /usr
+BuildRoot: /var/tmp/%name-buildroot
+
+%description
+SLOCCount (pronounced "sloc-count") is a suite of programs for counting
+physical source lines of code (SLOC) in potentially large software systems
+(thus, SLOCCount is a "software metrics tool" or "software measurement tool").
+SLOCCount can count physical SLOC for a wide number of languages;
+listed alphabetically, they are: Ada, Assembly, awk, Bourne shell, C, C++,
+C shell, COBOL, Expect, Fortran, Java, lex/flex, LISP (including Scheme),
+Modula-3, Objective-C, Pascal, Perl, PHP, Python, sed, TCL, and Yacc.
+SLOCCount can automatically determine if a file
+is a source code file or not, and if so, which language it's written in.
+As a result, you can analyze large systems completely automatically;
+it's been used to examine entire GNU/Linux distributions, for example.
+SLOCCount also includes some report-generating tools
+to collect the data generated and present it in several different formats.
+Normally you can just run "sloccount DIRECTORY" and all the source code
+in the directory and its descendants will be counted.
+
+%prep
+%setup
+
+%build
+make
+
+%install
+rm -rf ${RPM_BUILD_ROOT}
+mkdir -p ${RPM_BUILD_ROOT}%{_bindir}
+mkdir -p ${RPM_BUILD_ROOT}%{_mandir}/man1
+make install_programs PREFIX=${RPM_BUILD_ROOT}%{_prefix}
+make install_man PREFIX=${RPM_BUILD_ROOT}%{_prefix}
+
+%clean
+rm -rf ${RPM_BUILD_ROOT}
+
+%files
+%defattr(-, root, root)
+%doc sloccount.html README ChangeLog COPYING TODO
+%{_bindir}/*
+%{_mandir}/*/*
diff --git a/sql_count b/sql_count
new file mode 100755
index 0000000..8240fd9
--- /dev/null
+++ b/sql_count
@@ -0,0 +1,76 @@
+#!/usr/bin/perl
+# sql_count - count physical lines of code in SQL.
+
+# SQL is really screwed up in its commenting system.
+# In ANSI, "--" means start of comment, but this causes many problems
+# with automatically generated SQL queries. For example, given:
+# UPDATE tbl_name SET credit=credit-!payment!
+# If !payment! is automatically substituted for a negative number,
+# a comment is unexpectedly generated.
+
+# So, this program accepts "-- " (dash-dash-space) as a comment character.
+# It also supports "#" and /* .. */, which are supported by MySQL.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+ # Yes, we have -f
+ if ($ARGV[1] eq "-") {
+ # The list of files is in STDIN
+ while (<STDIN>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ } else {
+ # The list of files is in the file $ARGV[1]
+ open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $filewithlist\n";
+ while (<FILEWITHLIST>) {
+ chomp ($_);
+ &count_file ($_);
+ }
+ close FILEWITHLIST;
+ }
+ shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+ &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+ my ($file) = @_;
+ my $sloc = 0;
+
+ $result = `sed -e "s/#.*//" -e "s/-- .*//" < "$file" | c_count`;
+ $result =~ m/^\s*([0-9]+)/;
+ $sloc = $1;
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+}
diff --git a/stripccomments.c b/stripccomments.c
new file mode 100644
index 0000000..187659c
--- /dev/null
+++ b/stripccomments.c
@@ -0,0 +1,50 @@
+/*
+stripcomments - a simple program to remove C comments.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+
+*/
+
+#include <stdio.h>
+
+int peek() {
+ int c = getchar();
+ ungetc(c, stdin);
+ return c;
+}
+
+main() {
+ int c;
+ int incomment = 0;
+
+ while ( (c = getchar()) != EOF) {
+ if (!incomment) {
+ if ((c == '/') && (peek() == '*')) {incomment=1;}
+ } else {
+ if ((c == '*') && (peek() == '/')) {
+ c= getchar(); c=getchar(); incomment=0;
+ }
+ }
+ if ((c != EOF) && !incomment) {putchar(c);}
+ }
+}
+
diff --git a/stub b/stub
new file mode 100644
index 0000000..40cfabc
--- /dev/null
+++ b/stub
@@ -0,0 +1,22 @@
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+
diff --git a/stubsh b/stubsh
new file mode 100644
index 0000000..74456aa
--- /dev/null
+++ b/stubsh
@@ -0,0 +1,23 @@
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
diff --git a/table.html b/table.html
new file mode 100644
index 0000000..81474a3
--- /dev/null
+++ b/table.html
@@ -0,0 +1,569 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"><html><head><title>table</title><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"></head><body><br>
+By default SLOCCount uses a very simple estimating model for effort and schedule:
+the basic COCOMO model in the "organic" mode (see below for more about this).
+&nbsp;This model estimates effort and schedule, including design, code, test,
+and documentation time (both user/admin documentation and development documentation).
+Basic COCOMO is a nice simple model, and it's used as the default because
+it doesn't require any information about the code other than the SLOC count
+already computed.<br>
+<br>
+However, basic COCOMO's accuracy is limited in part for the same reason -
+basic COCOMO doesn't take a number of important factors into account.<br>
+If you have the necessary information, you can improve the model's accuracy
+by taking these factors into account. &nbsp;You can at least quickly determine
+if the right "mode" is being used to improve accuracy. &nbsp;You can also
+use the "Intermediate COCOMO" and "Detailed COCOMO" models that take more
+factors into account, and are likely to produce more accurate estimates as
+a result. Take these estimates as just that - estimates - they're not grand
+truths.<br>
+<br>
+To use the COCOMO model, you first need to determine if your application's
+mode, which can be "Organic", "embedded", or "semidetached". &nbsp;Most software
+is "organic" (which is why it's the default). &nbsp;Here are simple definitions
+of these modes:<br>
+<ul>
+<li>Organic: Relatively small software teams develop software in a highly
+familiar, in-house environment. &nbsp;It has a generally stable development
+environment, minimal need for innovative algorithms, and requirements can
+be relaxed to avoid extensive rework.</li><li>Semidetached: This is an intermediate
+step between organic and embedded. This is generally characterized by reduced
+flexibility in the requirements.</li><li>Embedded: The project must operate
+within tight (hard-to-meet) constraints, and requirements and interface specifications
+are often non-negotiable. &nbsp;The software will be embedded in a complex environment that the software must deal with as-is.<br>
+ </li>
+</ul>
+By default, SLOCCount uses the basic COCOMO model in the organic mode. &nbsp;For
+the basic COCOMO model, here are the critical factors for --effort and --schedule:<br>
+<ul>
+<li>Organic: effort factor = 2.4, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li><li>Semidetached: effort factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li><li>Embedded: effort factor = 3.6, exponent = 1.20; schedule factor = 2.5, exponent = 0.32<br>
+ </li>
+</ul>
+Thus, if you want to use sloccount but the project is actually semidetached,
+you can use "--effort 3.0 1.12 --schedule 2.5 0.35" as options to sloccount
+to get a more accurate estimate.<br>
+<br>
+For more accurate estimates, you can use the intermediate COCOMO models.
+&nbsp;For intermediate COCOMO, use the following figures:<br>
+<ul>
+ <li>Organic: effort base factor = 2.3, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+ <li>Semidetached: effort base factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+ <li>Embedded: effort base factor = 2.8, exponent = 1.20; schedule factor = 2.5, exponent = 0.32<br>
+ </li>
+</ul>
+The intermediate COCOMO values for schedule are exactly the same as the basic
+COCOMO model; the starting effort values are not quite the same, as discussed
+in Boehm's book. &nbsp;However, in the intermediate COCOMO model, you don't
+normally use the effort factors as-is, you use various corrective factors
+(called cost drivers). &nbsp;To use these corrections, you then consider
+all the cost drivers, and multiply them by the effort base factor, to determine
+the final effort factor. &nbsp;Here are the cost drivers (from Boehm's book,
+table 8-2 and 8-3):<br>
+<br>
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+
+
+ <tbody>
+ <tr>
+ <td valign="top" rowspan="1" colspan="2">Cost Drivers<br>
+ </td>
+ <td valign="top" rowspan="1" colspan="6">Ratings<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">ID<br>
+ </td>
+ <td valign="top">Driver Name<br>
+ </td>
+ <td valign="top">Very Low<br>
+ </td>
+ <td valign="top">Low<br>
+ </td>
+ <td valign="top">Nominal<br>
+ </td>
+ <td valign="top">High<br>
+ </td>
+ <td valign="top">Very High<br>
+ </td>
+ <td valign="top">Extra High<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">RELY<br>
+ </td>
+ <td valign="top">Required software reliability<br>
+ </td>
+ <td valign="top">0.75 (effect is slight inconvenience)<br>
+ </td>
+ <td valign="top">0.88 (easily recovered losses)<br>
+ </td>
+ <td valign="top">1.00 (recoverable losses)<br>
+ </td>
+ <td valign="top">1.15 (high financial loss)<br>
+ </td>
+ <td valign="top">1.40 (risk to human life)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">DATA<br>
+ </td>
+ <td valign="top">Database size<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ <td valign="top">0.94 (database bytes/SLOC &lt; 10)<br>
+ </td>
+ <td valign="top">1.00 (D/S between 10 and 100)<br>
+ </td>
+ <td valign="top">1.08 (D/S between 100 and 1000)<br>
+ </td>
+ <td valign="top">1.16 (D/S &gt; 1000)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">CPLX<br>
+ </td>
+ <td valign="top">Product complexity<br>
+ </td>
+ <td valign="top">0.70 (mostly straightline code, simple arrays, simple expressions)<br>
+ </td>
+ <td valign="top">0.85<br>
+ </td>
+ <td valign="top">1.00<br>
+ </td>
+ <td valign="top">1.15<br>
+ </td>
+ <td valign="top">1.30<br>
+ </td>
+ <td valign="top">1.65 (microcode, multiple resource scheduling, device timing dependent coding)<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">TIME<br>
+ </td>
+ <td valign="top">Execution time constraint<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ <td valign="top">1.00 (&lt;50% use of available execution time)<br>
+ </td>
+ <td valign="top">1.11 (70% use)<br>
+ </td>
+ <td valign="top">1.30 (85% use)<br>
+ </td>
+ <td valign="top">1.66 (95% use)<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">STOR<br>
+ </td>
+ <td valign="top">Main storage constraint<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ <td valign="top">1.00&nbsp;(&lt;50% use of available storage)</td>
+ <td valign="top">1.06 (70% use)<br>
+ </td>
+ <td valign="top">1.21 (85% use)<br>
+ </td>
+ <td valign="top">1.56 (95% use)<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">VIRT<br>
+ </td>
+ <td valign="top">Virtual machine (HW and OS) volatility<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ <td valign="top">0.87 (major change every 12 months, minor every month)<br>
+ </td>
+ <td valign="top">1.00 (major change every 6 months, minor every 2 weeks)</td>
+ <td valign="top">1.15 (major change every 2 months, minor changes every week)<br>
+ </td>
+ <td valign="top">1.30 (major changes every 2 weeks, minor changes every 2 days)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">TURN<br>
+ </td>
+ <td valign="top">Computer turnaround time<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ <td valign="top">0.87 (interactive)<br>
+ </td>
+ <td valign="top">1.00 (average turnaround &lt; 4 hours)<br>
+ </td>
+ <td valign="top">1.07<br>
+ </td>
+ <td valign="top">1.15<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">ACAP<br>
+ </td>
+ <td valign="top">Analyst capability<br>
+ </td>
+ <td valign="top">1.46 (15th percentile)<br>
+ </td>
+ <td valign="top">1.19 (35th percentile)<br>
+ </td>
+ <td valign="top">1.00 (55th percentile)<br>
+ </td>
+ <td valign="top">0.86 (75th percentile)<br>
+ </td>
+ <td valign="top">0.71 (90th percentile)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">AEXP<br>
+ </td>
+ <td valign="top">Applications experience<br>
+ </td>
+ <td valign="top">1.29 (&lt;= 4 months experience)<br>
+ </td>
+ <td valign="top">1.13 (1 year)<br>
+ </td>
+ <td valign="top">1.00 (3 years)<br>
+ </td>
+ <td valign="top">0.91 (6 years)<br>
+ </td>
+ <td valign="top">0.82 (12 years)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">PCAP<br>
+ </td>
+ <td valign="top">Programmer capability<br>
+ </td>
+ <td valign="top">1.42 (15th percentile)<br>
+ </td>
+ <td valign="top">1.17 (35th percentile)<br>
+ </td>
+ <td valign="top">1.00 (55th percentile)<br>
+ </td>
+ <td valign="top">0.86 (75th percentile)<br>
+ </td>
+ <td valign="top">0.70 (90th percentile)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">VEXP<br>
+ </td>
+ <td valign="top">Virtual machine experience<br>
+ </td>
+ <td valign="top">1.21 (&lt;= 1 month experience)<br>
+ </td>
+ <td valign="top">1.10 (4 months)<br>
+ </td>
+ <td valign="top">1.00 (1 year)<br>
+ </td>
+ <td valign="top">0.90 (3 years)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">LEXP<br>
+ </td>
+ <td valign="top">Programming language experience<br>
+ </td>
+ <td valign="top">1.14 (&lt;= 1 month experience)<br>
+ </td>
+ <td valign="top">1.07 (4 months)<br>
+ </td>
+ <td valign="top">1.00 (1 year)<br>
+ </td>
+ <td valign="top">0.95 (3 years)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">MODP<br>
+ </td>
+ <td valign="top">Use of "modern" programming practices (e.g. structured programming)<br>
+ </td>
+ <td valign="top">1.24 (No use)<br>
+ </td>
+ <td valign="top">1.10<br>
+ </td>
+ <td valign="top">1.00 (some use)<br>
+ </td>
+ <td valign="top">0.91<br>
+ </td>
+ <td valign="top">0.82 (routine use)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">TOOL<br>
+ </td>
+ <td valign="top">Use of software tools<br>
+ </td>
+ <td valign="top">1.24<br>
+ </td>
+ <td valign="top">1.10<br>
+ </td>
+ <td valign="top">1.00 (basic tools)<br>
+ </td>
+ <td valign="top">0.91 (test tools)<br>
+ </td>
+ <td valign="top">0.83 (requirements, design, management, documentation tools)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">SCED<br>
+ </td>
+ <td valign="top">Required development schedule<br>
+ </td>
+ <td valign="top">1.23 (75% of nominal)<br>
+ </td>
+ <td valign="top">1.08 (85% of nominal)<br>
+ </td>
+ <td valign="top">1.00 (nominal)<br>
+ </td>
+ <td valign="top">1.04 (130% of nominal)<br>
+ </td>
+ <td valign="top">1.10 (160% of nominal)<br>
+ </td>
+ <td valign="top"><br>
+ </td>
+ </tr>
+
+
+
+
+ </tbody>
+</table>
+<br>
+<br>
+<br>
+So, once all of the factors have been multiplied together, &nbsp;you can
+then use the "--effort" flag to set more accurate factors and exponents.<br>
+<br>
+For example, imagine that you're examining a fairly simple application that
+meets the "organic" requirements. &nbsp;Organic projects have a base factor
+of 2.3 and exponents of 1.05, as noted above. &nbsp;We then examine all the
+factors to determine a corrected base factor. &nbsp;For this example, imagine
+that we determine the values of these cost drivers are as follows:<br>
+<br>
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+
+ <tbody>
+ <tr>
+ <td valign="top" rowspan="1" colspan="2">Cost Drivers<br>
+ </td>
+ <td valign="top" rowspan="1" colspan="2">Ratings<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">ID<br>
+ </td>
+ <td valign="top">Driver Name<br>
+ </td>
+ <td valign="top">Rating<br>
+ </td>
+ <td valign="top">Multiplier<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">RELY<br>
+ </td>
+ <td valign="top">Required software reliability<br>
+ </td>
+ <td valign="top">Low - easily recovered losses<br>
+ </td>
+ <td valign="top">0.88<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">DATA<br>
+ </td>
+ <td valign="top">Database size<br>
+ </td>
+ <td valign="top">Low<br>
+ </td>
+ <td valign="top">0.94<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">CPLX<br>
+ </td>
+ <td valign="top">Product complexity<br>
+ </td>
+ <td valign="top">Nominal<br>
+ </td>
+ <td valign="top">1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">TIME<br>
+ </td>
+ <td valign="top">Execution time constraint<br>
+ </td>
+ <td valign="top">Nominal<br>
+ </td>
+ <td valign="top">1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">STOR<br>
+ </td>
+ <td valign="top">Main storage constraint<br>
+ </td>
+ <td valign="top">Nominal<br>
+ </td>
+ <td valign="top">1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">VIRT<br>
+ </td>
+ <td valign="top">Virtual machine (HW and OS) volatility<br>
+ </td>
+ <td valign="top">Low (major change every 12 months, minor every month)<br>
+ </td>
+ <td valign="top">0.87<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">TURN<br>
+ </td>
+ <td valign="top">Computer turnaround time<br>
+ </td>
+ <td valign="top">Low (interactive)<br>
+ </td>
+ <td valign="top">0.87<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">ACAP<br>
+ </td>
+ <td valign="top">Analyst capability<br>
+ </td>
+ <td valign="top">Nominal (55th percentile)<br>
+ </td>
+ <td valign="top">1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">AEXP<br>
+ </td>
+ <td valign="top">Applications experience<br>
+ </td>
+ <td valign="top">Nominal (3 years)<br>
+ </td>
+ <td valign="top">1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">PCAP<br>
+ </td>
+ <td valign="top">Programmer capability<br>
+ </td>
+ <td valign="top">Nominal (55th percentile)<br>
+ </td>
+ <td valign="top">1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">VEXP<br>
+ </td>
+ <td valign="top">Virtual machine experience<br>
+ </td>
+ <td valign="top">High (3 years)<br>
+ </td>
+ <td valign="top">0.90<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">LEXP<br>
+ </td>
+ <td valign="top">Programming language experience<br>
+ </td>
+ <td valign="top">High (3 years)<br>
+ </td>
+ <td valign="top">0.95<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">MODP<br>
+ </td>
+ <td valign="top">Use of "modern" programming practices (e.g. structured programming)<br>
+ </td>
+ <td valign="top">High (Routine use)<br>
+ </td>
+ <td valign="top">0.82<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">TOOL<br>
+ </td>
+ <td valign="top">Use of software tools<br>
+ </td>
+ <td valign="top">Nominal (basic tools)<br>
+ </td>
+ <td valign="top">1.00<br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="top">SCED<br>
+ </td>
+ <td valign="top">Required development schedule<br>
+ </td>
+ <td valign="top">Nominal<br>
+ </td>
+ <td valign="top">1.00<br>
+ </td>
+ </tr>
+
+
+
+
+ </tbody>
+</table>
+<br>
+By multiplying these driver values together in this example, we compute:<br>
+<pre>0.88*0.94*1*1*1*0.87*0.87*1*1*1*0.90*0.95*0.82*1*1</pre>
+The correction from these is 0.438964094, which you multiply by the base
+factor (2.3 in this case) to determine a final effort factor. &nbsp;For this
+example, the final factor for the effort calculation is 1.01. You would then
+invoke sloccount with "--effort 1.01 1.05" to pass in the corrected factor
+and exponent. &nbsp;You don't need to use "--schedule" to set the factors
+(they default to the values for organic model), but you can set them manually
+anyway by setting "--schedule 2.5 0.38". &nbsp;You <i>do</i> need to use the --schedule option for embedded and semidetached projects. The final command would be:<br>
+<br>
+sloccount --effort 1.01 1.05 --schedule 2.5 0.38 my_project<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+</body></html> \ No newline at end of file
diff --git a/tcl_count b/tcl_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/tcl_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+generic_count '#' $@
+
diff --git a/testcode/conditions.CBL b/testcode/conditions.CBL
new file mode 100644
index 0000000..8e12724
--- /dev/null
+++ b/testcode/conditions.CBL
@@ -0,0 +1,31 @@
+ $ SET SOURCEFORMAT"FREE"
+IDENTIFICATION DIVISION.
+PROGRAM-ID. Conditions.
+AUTHOR. Michael Coughlan.
+* An example program demonstrating the use of
+* condition names (level 88's).
+* The EVALUATE and PERFORM verbs are also used.
+
+DATA DIVISION.
+WORKING-STORAGE SECTION.
+01 Char PIC X.
+ 88 Vowel VALUE "a", "e", "i", "o", "u".
+ 88 Consonant VALUE "b", "c", "d", "f", "g", "h"
+ "j" THRU "n", "p" THRU "t", "v" THRU "z".
+ 88 Digit VALUE "0" THRU "9".
+ 88 ValidCharacter VALUE "a" THRU "z", "0" THRU "9".
+
+PROCEDURE DIVISION.
+Begin.
+ DISPLAY "Enter lower case character or digit. No data ends.".
+ ACCEPT Char.
+ PERFORM UNTIL NOT ValidCharacter
+ EVALUATE TRUE
+ WHEN Vowel DISPLAY "The letter " Char " is a vowel."
+ WHEN Consonant DISPLAY "The letter " Char " is a consonant."
+ WHEN Digit DISPLAY Char " is a digit."
+ WHEN OTHER DISPLAY "problems found"
+ END-EVALUATE
+ END-PERFORM
+ STOP RUN.
+
diff --git a/testcode/hello.f b/testcode/hello.f
new file mode 100644
index 0000000..f66fe77
--- /dev/null
+++ b/testcode/hello.f
@@ -0,0 +1,10 @@
+c Hello World
+* Hello World
+! Hello World
+ program hello
+ implicit none
+ print '("Hello, World!")'
+ end
+ ! a fancy comment
+!hpf$ not a comment
+!omp$ not a comment either
diff --git a/testcode/hello.f90 b/testcode/hello.f90
new file mode 100644
index 0000000..6b26a2e
--- /dev/null
+++ b/testcode/hello.f90
@@ -0,0 +1,7 @@
+! Hello World
+program hello
+ implicit none
+ print '("Hello, World!")'
+end program hello
+!hpf$ not a comment
+!omp$ not a comment either
diff --git a/testcode/hello.pas b/testcode/hello.pas
new file mode 100644
index 0000000..40c6005
--- /dev/null
+++ b/testcode/hello.pas
@@ -0,0 +1,9 @@
+{ Hello World in Pascal, for testing SLOCCount.
+ This is multi-line, testing curly braces. }
+(* This is another multi-line comment.
+ Here's another line. *)
+program Hello;
+begin (* Main *)
+ writeln ('Hello, world.')
+end. (* Main *)
+
diff --git a/testcode/hello1.pas b/testcode/hello1.pas
new file mode 100644
index 0000000..c53c0d2
--- /dev/null
+++ b/testcode/hello1.pas
@@ -0,0 +1,12 @@
+{ Hello World in Pascal, for testing SLOCCount.
+ This is multi-line, testing curly braces. }
+(* This is another multi-line comment.
+ Here's another line. *)
+(* This is { another } test. **)
+program Hello;
+begin (* Main *)
+ writeln ('Hello, world.');
+ writeln ('It''s a test!');
+ writeln ('Show that newlines are detected')
+end. (* Main *)
+
diff --git a/testcode/messages.rb b/testcode/messages.rb
new file mode 100644
index 0000000..1521ae6
--- /dev/null
+++ b/testcode/messages.rb
@@ -0,0 +1,152 @@
+#!/usr/local/bin/ruby
+# messages.rb - this is a test for the Ruby SLOC counter.
+# You should get 110 SLOC for this file.
+
+# Guru module: private messages among players
+# Copyright (C) 2001, 2002 Josef Spillner, dr_maux@user.sourceforge.net
+# This is used as a test case in SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+# Commands:
+# guru do i have any messages
+# guru tell grubby nice to meet myself :)
+# guru alert grubby
+
+databasedir = ENV['HOME'] + "/.ggz/grubby"
+
+####################################################################################
+
+class GuruMessages
+ def initialize
+ @msg = Array.new
+ @alerts = Array.new
+ end
+ def add(fromplayer, player, message)
+ @entry = Array.new
+ newmessage = (fromplayer + " said: " + message.join(" ")).split(" ")
+ @entry << player << newmessage
+ @msg.push(@entry)
+ print "OK, I make sure he gets the message."
+ $stdout.flush
+ sleep 1
+ end
+ def tell(player)
+ len = @msg.length
+ a = 0
+ for i in 0..len
+ unless @msg[len-i] == nil
+ print @msg[len-i][1][0..@msg[len-i][1].length - 1].join(" ") + "\n" if player == @msg[len-i][0]
+ if player == @msg[len-i][0]
+ @msg.delete_at(len-i)
+ a = 1
+ end
+ end
+ end
+ if a == 0
+ print "Sorry " + player + ", I guess you're not important enough to get any messages."
+ end
+ $stdout.flush
+ sleep 1
+ end
+ def alert(fromplayer, player)
+ @entry = Array.new << fromplayer << player
+ @alerts.push(@entry)
+ print "OK, I alert " + player + " when I see him."
+ $stdout.flush
+ sleep 1
+ end
+ def trigger(player)
+ len = @alerts.length
+ a = 0
+ for i in 0..len
+ unless @alerts[len-i] == nil
+ if player == @alerts[len-i][0]
+ print player + ": ALERT from " + @alerts[len-i][1] + "\n"
+ @alerts.delete_at(len-i)
+ a = 1
+ end
+ end
+ end
+ if a == 1
+ $stdout.flush
+ sleep 1
+ return 1
+ end
+ return 0
+ end
+end
+
+input = $stdin.gets.chomp.split(/\ /)
+
+mode = 0
+if (input[1] == "do") && (input[2] == "i") && (input[3] == "have") &&
+ (input[4] == "any") && (input[5] == "messages")
+ mode = 1
+ player = ARGV[0]
+end
+if (input[1] == "tell")
+ mode = 2
+ fromplayer = ARGV[0]
+ player = input[2]
+ message = input[3..input.length]
+end
+if(input[1] == "alert")
+ mode = 3
+ fromplayer = ARGV[0]
+ player = input[2]
+end
+
+m = nil
+begin
+ File.open(databasedir + "/messages") do |f|
+ m = Marshal.load(f)
+ end
+rescue
+ m = GuruMessages.new
+end
+
+if mode == 0
+ ret = m.trigger ARGV[0]
+ if ret == 0
+ exit
+ end
+end
+if mode == 1
+ if player != nil
+ m.tell player
+ else
+ print "If you mind telling me who you are?"
+ $stdout.flush
+ sleep 1
+ end
+end
+if mode == 2
+ m.add fromplayer, player, message
+end
+if mode == 3
+ m.alert fromplayer, player
+end
+
+File.open(databasedir + "/messages", "w+") do |f|
+ Marshal.dump(m, f)
+end
+
diff --git a/testcode/temp.c b/testcode/temp.c
new file mode 100644
index 0000000..d540f08
--- /dev/null
+++ b/testcode/temp.c
@@ -0,0 +1,5 @@
+
+
+main() {
+ int i;
+}
diff --git a/testcode/test.hs b/testcode/test.hs
new file mode 100644
index 0000000..de874df
--- /dev/null
+++ b/testcode/test.hs
@@ -0,0 +1,19 @@
+
+-- This literate program prompts the user for a number
+-- and prints the factorial of that number:
+
+{- This is a comment. -}
+{- This is a comment,
+ too -}
+
+{-# this is a pragma, COUNT IT -}
+
+ main :: IO ()
+ main = do putStr "Enter a number: "
+ l <- readLine
+ putStr "n!= "
+ print (fact (read l))
+ fact :: Integer -> Integer
+ fact 0 = 1
+ fact n = n * fact (n-1)
+
diff --git a/testcode/test1.inc b/testcode/test1.inc
new file mode 100644
index 0000000..a56d14e
--- /dev/null
+++ b/testcode/test1.inc
@@ -0,0 +1,23 @@
+<?php
+
+ /**
+ * Test file for php_count, part of SLOCCount. This is a C-style comment.
+ * This file is different from .php.
+ */
+
+ // This is a C++-style comment.
+
+ # This is a shell-style comment.
+
+ # Here are 9 lines of code:
+
+ function get()
+ {
+ $total = 0;
+ $simplestring = 'hello';
+ $simplestring = '\\hello\'';
+ $funkystring = "hello";
+ $funkystring = "$hi\\\"";
+ return 0;
+ }
+?>
diff --git a/testcode/test1.lhs b/testcode/test1.lhs
new file mode 100644
index 0000000..3c19a70
--- /dev/null
+++ b/testcode/test1.lhs
@@ -0,0 +1,15 @@
+\documentstyle{article}
+
+\begin{document}
+
+\section{Introduction}
+
+This is a trivial program that prints the first 20
+factorials. It should have 2 lines of code.
+
+\begin{code}
+main :: IO ()
+main = print [ (n, product [1..n]) | n <- [1..20]]
+\end{code}
+
+\end{document}
diff --git a/testcode/test1.php b/testcode/test1.php
new file mode 100644
index 0000000..9fd2510
--- /dev/null
+++ b/testcode/test1.php
@@ -0,0 +1,27 @@
+<?php
+
+ /**
+ * Test file for php_count, part of SLOCCount. This is a C-style comment.
+ */
+
+ // This is a C++-style comment.
+
+ # This is a shell-style comment.
+
+ # Here are 13 lines of code:
+
+ function get()
+ {
+ $total = 0;
+ $simplestring = 'hello';
+ $simplestring = '\\hello\'';
+ $funkystring = "hello";
+ $funkystring = "$hi\\\"";
+ $heretest <<< wiggle
+juggle
+ wiggle /* This doesn't end the string, so this isn't a C comment.
+wiggle;
+ return 0;
+ }
+
+?>
diff --git a/testcode/test2.lhs b/testcode/test2.lhs
new file mode 100644
index 0000000..6e39905
--- /dev/null
+++ b/testcode/test2.lhs
@@ -0,0 +1,44 @@
+
+This is an extract of a larger literate Haskell file for testing
+SLOCCount. It should have 21 lines of code.
+
+This dumps the tree in dot format, which is very handy for visualizing
+the trees.
+
+> dotTree name t = "digraph " ++ filter dotChars name ++ " { " ++ (dotTree' t 0) ++ " }"
+
+> dotTree' Empty _ = ""
+> dotTree' t i | is_leaf t = "n"++(show i)++" [label=\""++(show $ x_span t)++
+> "\",shape=box]; "
+> | otherwise = "n"++(show i)++" [label=\""++(show $ x_span t)++"\"]; " ++
+> "n"++(show i)++" -> n"++(show (2*i+1))++"; "++
+> "n"++(show i)++" -> n"++(show (2*i+2))++"; "++
+> dotTree' (left t) (2*i+1) ++
+> dotTree' (right t) (2*i+2)
+> where is_leaf Node { left = Empty, right = Empty } = True
+> is_leaf _ = False
+> {- this is a comment
+
+foo bar baz
+
+> that
+> spans literate blocks -}
+
+> dotChars '.' = False
+> dotChars '/' = False
+> dotChars _ = True
+
+These functions fill in the monotonically increasing index values for
+the lines in the finite map. They also do appropriate things to combine
+the world values.
+
+> idxList [] n = []
+> idxList (x:xs) n = (x {idx=n}):(idxList xs (n+1))
+
+> idxFM' fm (x,k) = addToFM (delFromFM fm k) k (y {idx=toInteger x})
+> where y = case lookupFM fm k of
+> Just foo -> foo
+> Nothing -> error $ "No such key: " ++ show k
+
+> idxFM fm = foldl idxFM' fm (zip [1..sizeFM fm] $ keysFM fm)
+
diff --git a/testcode/wokka.cbl b/testcode/wokka.cbl
new file mode 100644
index 0000000..d7ccd0c
--- /dev/null
+++ b/testcode/wokka.cbl
@@ -0,0 +1,4 @@
+ * Comment.
+ IDENTIFICATION DIVISION.
+ PROGRAM-ID. Conditions.
+
diff --git a/testcode/wokka.cs b/testcode/wokka.cs
new file mode 100644
index 0000000..fa95425
--- /dev/null
+++ b/testcode/wokka.cs
@@ -0,0 +1,8 @@
+
+/* comment: This has 5 physical lines of code. */
+
+class Test {
+ static void Main() {
+ System.Console.WriteLine("Hello, World (in C#)");
+ }
+}
diff --git a/usc_subset.tar b/usc_subset.tar
new file mode 100644
index 0000000..67e2d5a
--- /dev/null
+++ b/usc_subset.tar
@@ -0,0 +1 @@
+java_lines.c