/* php_count: given a list of C/C++/Java files on the command line, count the SLOC in each one. SLOC = physical, non-comment lines. This program knows about C++ and C comments (and how they interact), and correctly ignores comment markers inside strings. This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC). Copyright (C) 2001-2004 David A. Wheeler. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA To contact David A. Wheeler, see his website at: http://www.dwheeler.com. Usage: Use in one of the following ways: php_count # As filter php_count list_of_files # Counts for each file. php_count -f fl # Counts the files listed in "fl". */ #include #include #include #include /* If ALLOW_SHORT_TAGS is true, then = s && isspace(*p); p--) { *p = '\0'; } return; } long sloc_count(char *filename, FILE *stream) { /* Count the sloc in the program in stdin. */ enum mode_t mode = NONE; /* State machine state - NORMAL == PHP code */ enum comment_t comment_type; /* ANSIC_STYLE, CPP_STYLE, SH_STYLE */ enum end_t expected_end; /* The kind of ending expected, e.g. ?> */ char *heredoc_end; sloc = 0; /* The following implements a state machine with transitions; the main state is "mode"; the transitions are triggered by character input. */ while (!is_input_eof) { /* printf("mode=%d, current_char=%c\n", mode, current_char()); */ if (mode == NONE) { /* Note: PHP will raise errors if something starts with works. */ if (match_consume(" should be more flexible, allowing for other attributes etc. I haven't seen this as a real problem. */ } else if (match_consume("", stream)) { mode = NONE; } else if (match_consume("\"", stream)) { sawchar = 1; mode = INSTRING; } else if (match_consume("\'", stream)) { sawchar = 1; mode = INSINGLESTRING; } else if (match_consume("/*", stream)) { mode = INCOMMENT; comment_type = ANSIC_STYLE; } else if (match_consume("//", stream)) { mode = INCOMMENT; comment_type = CPP_STYLE; } else if (match_consume("#", stream)) { mode = INCOMMENT; comment_type = SH_STYLE; } else if (match_consume("<<<", stream)) { mode = HEREDOC; while (isspace(current_char(stream)) && !is_input_eof) {consume_char(stream);} heredoc_end = rest_of_line(stream); strstrip(heredoc_end); } else { if (!isspace(current_char(stream))) sawchar = 1; consume_char(stream); } } else if (mode == INSTRING) { /* We only count string lines with non-whitespace -- this is to gracefully handle syntactically invalid programs. You could argue that multiline strings with whitespace are still executable and should be counted. */ if (!isspace(current_char(stream))) sawchar = 1; if (match_consume("\"", stream)) {mode = NORMAL;} else if (match_consume("\\\"", stream) || match_consume("\\\\", stream) || match_consume("\\\'", stream)) {} else consume_char(stream); } else if (mode == INSINGLESTRING) { /* We only count string lines with non-whitespace; see above. */ if (!isspace(current_char(stream))) sawchar = 1; if (current_char(stream) == '\'') {} if (match_consume("'", stream)) {mode = NORMAL; } else if (match_consume("\\\\", stream) || match_consume("\\\'", stream)) { } else { consume_char(stream); } } else if (mode == INCOMMENT) { if ((comment_type == ANSIC_STYLE) && match_consume("*/", stream)) { mode = NORMAL; } /* Note: in PHP, must accept ending markers, even in a comment: */ else if ((expected_end==NORMAL_END) && match_consume("?>", stream)) { mode = NONE; } else if ((expected_end==ASP_END) && match_consume("%>", stream)) { mode = NONE; } else if ((expected_end==SCRIPT_END) && match_consume("", stream)) { mode = NONE; } else if ( ((comment_type == CPP_STYLE) || (comment_type == SH_STYLE)) && match_consume("\n", stream)) { mode = NORMAL; } else consume_char(stream); } else if (mode == HEREDOC) { if (!isspace(current_char(stream))) sawchar = 1; if (beginning_of_line && match_consume(heredoc_end, stream)) { mode=NORMAL; } else { consume_char(stream); } } else { fprintf(stderr, "Warning! Unknown mode in PHP file %s, mode=%d\n", filename, mode); consume_char(stream); } } if (mode != NONE) { fprintf(stderr, "Warning! Unclosed PHP file %s, mode=%d\n", filename, mode); } return sloc; } void count_file(char *filename) { long sloc; FILE *stream; stream = fopen(filename, "r"); line_number = 0; init_input(stream); sloc = sloc_count(filename, stream); fclose (stream); total_sloc += sloc; printf("%ld %s\n", sloc, filename); } char *read_a_line(FILE *file) { /* Read a line in, and return a malloc'ed buffer with the line contents. Any newline at the end is stripped. If there's nothing left to read, returns NULL. */ /* We'll create a monstrously long buffer to make life easy for us: */ char buffer[10000]; char *returnval; char *newlinepos; returnval = fgets(buffer, sizeof(buffer), file); if (returnval) { newlinepos = buffer + strlen(buffer) - 1; if (*newlinepos == '\n') {*newlinepos = '\0';}; return strdup(buffer); } else { return NULL; } } int main(int argc, char *argv[]) { long sloc; int i; FILE *file_list; char *s; total_sloc = 0; line_number = 0; if (argc <= 1) { init_input(stdin); sloc = sloc_count("-", stdin); printf("%ld %s\n", sloc, "-"); total_sloc += sloc; } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) { if (!strcmp (argv[2], "-")) { file_list = stdin; } else { file_list = fopen(argv[2], "r"); } if (file_list) { while ((s = read_a_line(file_list))) { count_file(s); free(s); } } } else { for (i=1; i < argc; i++) { count_file(argv[i]); } } printf("Total:\n"); printf("%ld\n", total_sloc); exit(0); }