#!/bin/sh ######################################################################### # # # OCaml # # # # Damien Doligez, projet Gallium, INRIA Rocquencourt # # # # Copyright 2012 Institut National de Recherche en Informatique et # # en Automatique. All rights reserved. This file is distributed # # under the terms of the Q Public License version 1.0. # # # ######################################################################### # check-typo - Check typographic conventions on OCaml sources. # This program will check files for the following rules: # - absence of TAB characters (tab) # - absence of non-ASCII characters (non-ascii) # - absence of non-printing ASCII characters (non-printing) # - absence of white space at end of line (white-at-eol) # - absence of empty lines at end of file (white-at-eof) # - presence of a LF character at the end of the file (missing-lf) # - maximum line length of 80 characters (long-line) # - maximum line length of 132 characters (very-long-line) # - presence of a copyright header (missing-header) # - absence of a leftover "$Id" string (svn-keyword) # Exceptions are handled with a git attribute: "ocaml-typo". # Its value for a given file is a comma-separated list of rule names, # which lists the rules that should be disabled for this file. # The rule names are the ones shown above in parentheses. # Built-in exceptions: # - Any binary file (i.e. with git attribute "binary") # is automatically exempt from all the rules. # - Any file whose name matches one of the following patterns is # automatically exempt from all rules # *.reference # */reference # */.depend* # - Any file whose name begins with "Makefile" is automatically exempt # from the "tabs" rule. # - Any file whose name matches one of the following patterns is # automatically exempt from the "missing-header" rule. # *.mlpack # *.mllib # *.mltop # *.odocl # *.clib # - Any file whose name matches the following pattern is automatically # exempt from the "long-line" rule (but not from "very-long-line"). # */ocamldoc/* # ASCII characters are bytes from 0 to 127. Any other byte is # flagged as a non-ASCII character. # For the purpose of this tool, printing ASCII characters are: # - the non-white printable ASCII characters (33 to 126) # - TAB (09) # - LF (10) # - SPC (32) # Anything else is flagged as a non-printing ASCII character. # This program will recursively explore the files and directories given # on the command line (or by default the current directory), and check # every file therein for compliance to the rules. # Directories named .git (and their contents) are always ignored. # This program ignores any file that is not under git control, unless # explicitly given on the command line. # If a directory has the git attribute "ocaml-typo" set to "prune", # then it and its contents are ignored. # You can ignore a rule by giving the option - on the command # line (before any file names). # First prevent i18n from messing up everything. export LC_ALL=C # Special case for recursive call from the find command (see IGNORE_DIRS). case "$1" in --check-prune) case `git check-attr ocaml-typo "$2" 2>/dev/null` in *prune*) echo "INFO: pruned directory $2 (ocaml-typo=prune)" >&2; exit 0;; *) exit 3;; esac;; esac usage () { echo "usage: check-typo {-} [--] {}" >&2 exit 2 } userrules='' while : ; do case "$1" in -help|--help) usage;; -*) userrules="${1#-},$userrules"; shift;; --) shift; break;; *) break;; esac done IGNORE_DIRS=" -name .git -prune -o -type d -exec $0 --check-prune {} ; -prune -o " ( case $# in 0) find . $IGNORE_DIRS -type f -print;; *) for i in "$@"; do find "$i" $IGNORE_DIRS -type f -print; done;; esac ) | ( while read f; do case `git ls-files "$f" 2>&1` in "") is_svn=false;; *) is_svn=true;; esac case "$*" in *$f*) is_cmd_line=true;; *) is_cmd_line=false;; esac if $is_svn || $is_cmd_line; then :; else continue; fi svnrules='' if $is_svn; then case `git check-attr binary "$f"` in *'binary: set') continue;; esac svnrules=`git check-attr ocaml-typo "$f" | sed -e 's/.*: //'` case $svnrules in unspecified) svnrules= ;; esac fi rules="$userrules" add_hd(){ rules="missing-header,$rules"; } case "$f" in Makefile*|*/Makefile*) rules="tab,$rules";; *.mlpack|*.mllib|*.mltop|*.odocl|*.itarget|*.clib) add_hd;; *.reference|*/reference|*/.depend*) continue;; esac case "$f" in ocamldoc/*|*/ocamldoc/*) rules="long-line,$rules";; esac (cat "$f"; echo) \ | awk -v rules="$rules" -v svnrules="$svnrules" -v file="$f" \ ' function err(name, msg) { ++ counts[name]; if (("," rules svnrules ",") !~ ("[, ]" name "[, ]") \ && counts[name] <= 10){ printf ("%s:%d.%d:", file, NR, RSTART + RLENGTH); printf (" [%s] %s\n", name, msg); if (counts[name] == 10){ printf ("WARNING: too many [%s] in this file.", name); printf (" Others will not be reported.\n"); } } } match($0, /\t/) { err("tab", "TAB character(s)"); } match($0, /[\200-\377]/) { err("non-ascii", "non-ASCII character(s)"); } match($0, /[^\t\200-\377 -~]/) { err("non-printing", "non-printing ASCII character(s)"); } match($0, /[ \t]+$/) { err("white-at-eol", "whitespace at end of line"); } match($0, /\$Id(: .*)?\$/) { err("svn-keyword", "SVN keyword marker"); } length($0) > 80 { RSTART = 81; RLENGTH = 0; err("long-line", "line is over 80 characters"); } length($0) > 132 { RSTART = 133; RLENGTH = 0; err("very-long-line", "line is over 132 characters"); } 3 <= NR && NR <= 5 \ && (/ OCaml / || / ocamlbuild / || / OCamldoc /) { header_ocaml = NR; } header_ocaml && header_ocaml + 4 <= NR && NR <= header_ocaml + 6 \ && / Copyright / { header_copyright = 1; } { prev_line = last_line; last_line = $0; } END { if (match(last_line, /.+/)){ err("missing-lf", "missing linefeed at EOF"); prev_line = last_line; ++ NR; empty_file = 0; }else{ empty_file = NR == 1; } if (!empty_file && match(prev_line, /^$/)){ err("white-at-eof", "empty line(s) at EOF"); } NR = 1; RSTART = 1; RLENGTH = 0; if (!(header_ocaml && header_copyright)){ err("missing-header", "missing copyright header"); } split(svnrules, r, "[, ]"); for (i in r){ name = r[i]; if (name != "" && !counts[name]){ err("unused-prop", sprintf("unused [%s] in ocaml-typo", name)); } } } ' done )