#! /bin/bash
# $Id$
#
# This script is installed as a cron job to automatically update the
# Docutils web site whenever the SVN files change.  Any .html document
# with a corresponding .txt file is regenerated whenever the .txt
# changes.
#
# Options:
#   -f    Do not give feedback.
#   -t    Run the script in trace mode ("set -o xtrace").
#   -u    Regenerate .html unconditionally.
#   -v    Run verbosely.
#
# Prerequisites:
#
# - Checked out trunk at $snapshotdir.
# - Checked out main tree at $lib.

# exit on error
set -e

# make all newly created files group writeable
umask 002

basedir=/home/groups/docutils/htdocs
project=docutils
# $auxdir is non-public.
auxdir=$basedir/aux
# $htdocsdest is the destination for htdocs and will be moved to
# another server later; so we keep it non-public (under $auxdir).
htdocsdest=$auxdir/htdocs
# Where to create the snapshots (non-public).
snapshotdir=$auxdir/snapshots
# Where to publish the snapshots (public).
snapshotdest=/home/groups/ftp/pub/docutils
bindir=$auxdir/bin
htdocs_patchfile=$auxdir/htdocs.patch
htdocs_tarball=$auxdir/htdocs.tar
htdocs_new_tarball=$auxdir/htdocs.new.tar
# htdocs directory on SF.net
remoteproject=/home/groups/d/do/docutils
remotehtdocs=$remoteproject/htdocs
pylib=$auxdir/lib/python
lib=$pylib/$project
# Lock directory.
lockdir=$auxdir/lock
# SSH stuff.
sshdir=$auxdir/.ssh
sshhost=docutilsupdate@shell.sourceforge.net
sshcommand="ssh -i $lockdir/id_dsa
            -o UserKnownHostsFile=$sshdir/known_hosts $sshhost"
# Project base URL (for sitemap) without trailing slash.
baseurl="http://docutils.sourceforge.net"

export PYTHONPATH=$pylib:$lib:$lib/extras
export PATH=$lib/tools:$PATH

trace=0
unconditional=0
verbose=0
feedback=1

while getopts ftuv opt
do
    case $opt in
        f)  feedback=;;
        t)  trace=1;;
        u)  unconditional=1;;
        v)  verbose=1;;
        \?) exit 2;;
    esac
done
shift `expr $OPTIND - 1`

test $feedback && echo 'Starting docutils-update run...' || true

if [ $trace -eq 1 -o $verbose -eq 1 ] ; then
    set -o xtrace
fi

# Acquire lock.
if ! mkdir $lockdir; then
    echo
    echo Could not create lock directory at
    echo $lockdir
    echo
    echo Please ensure no other user is running this script
    echo and delete the directory.
    exit 1
fi
# Always clean up on exit.
trap "rm -rf $lockdir; trap - 0; exit 1" 0 1 2 3 15
# Make sure the lock directory is deletable (i.e. rwx) by other group
# members (in case this script crashes after copying files into the
# directory) and un-readable by world (because we'll be storing the
# key in it).
chmod 0770 $lockdir


# update library area
cd $lib
svn up --quiet

# -------------------- Snapshots: --------------------

# gather the materials
cd $snapshotdir
svn -q revert $project/$project/__init__.py
haschanges="`svn up docutils sandbox web | grep -v '^At revision '; true`"

# update __version_details__ string
version_details="snapshot `date --utc --iso`, r`svn info docutils | grep ^Revision: | sed 's/^Revision: //'`"
(echo ",s/^__version_details__ = .*\$/__version_details__ = '$version_details'/";
    echo wq) | ed $project/$project/__init__.py 2> /dev/null

# Ensure proper directory permissions are set so that the files can be
# modified by several users.  Changing permissions of files is
# probably not necessary because files can be deleted and re-created.
# Do not change permissions of aux directory to keep it non-public
# (but change permissions for all subdirectories).
#find $basedir -type f -print0 | xargs -0 chmod ug+rw 2> /dev/null || true
find $basedir -name aux -o -type d -print0 | xargs -0 chmod ug+rwxs 2> /dev/null || true

# create the snapshots
exclude='--exclude=.svn'
tar -cz $exclude -f $project-snapshot.tgz $project
tar -cz $exclude -f $project-sandbox-snapshot.tgz sandbox
tar -cz $exclude -f $project-web-snapshot.tgz web
( cd sandbox/gschwant ;
  tar -cz $exclude -f ../../docfactory-snapshot.tgz docfactory )

# plant the snapshots
mv -f *snapshot.tgz $snapshotdest

# revert and touch (to avoid updating the web site only because of the
# changed timestamp)
svn -q revert $project/$project/__init__.py
touch $project/$project/__init__.py --date \
    "`svn info $project/$project/__init__.py | \
    grep 'Last Changed Date:' | sed 's/[^:]*: //'`"

# -------------------- htdocs: --------------------

cd $snapshotdir

function copy_to_htdocsdest() {
    find "$@" -type d -name .svn -prune -o \( -type f -o -type l \) -print0 | \
        xargs -0 cp --no-dereference --update --parents \
            --target-directory=$htdocsdest
}

# update htdocs
copy_to_htdocsdest sandbox
(cd $project; copy_to_htdocsdest *)
(cd web; copy_to_htdocsdest * .[^.]*)

# update HTML docs
cd $htdocsdest/tools

if [ $trace -eq 0 ] ; then
    set +o xtrace
fi

for makefile in `find .. -name Makefile.docutils-update` ; do
    dir=`dirname $makefile`
    ( cd $dir ; make -f Makefile.docutils-update -s )
done

for htmlfile in `find .. -name '*.html'` ; do
    dir=`dirname $htmlfile`
    base=`basename $htmlfile .html`
    txtfile=$dir/$base.txt
    if [ -e $txtfile ] ; then
        if [ $unconditional -eq 1 -o $txtfile -nt $htmlfile ] ; then
            if [ "${base:0:4}" == "pep-" ] ; then
                test $feedback && echo "$txtfile (PEP)" || true
                python $lib/tools/rstpep2html.py --config=$dir/docutils.conf $txtfile $htmlfile
                haschanges=1
            else
                test $feedback && echo $txtfile || true
                python $lib/tools/rst2html.py --config=$dir/docutils.conf $txtfile $htmlfile
                haschanges=1
            fi
        fi
    fi
done

if [ $trace -eq 1 -o $verbose -eq 1 ] ; then
    set -o xtrace
fi

# -------------------- XML sitemap for search engines: --------------------

cd $htdocsdest

# Update the sitemap only if something has changed because it takes
# very much CPU time.
if test -n "$haschanges"; then
    (
        echo '<?xml version="1.0" encoding="UTF-8"?>'
        echo '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'
        if [ $trace -eq 0 ] ; then
            set +o xtrace
        fi
        find . -name '.[^.]*' -prune -o -type d -printf '%p/\n' \
                -o \( -type f -o -type l \) -print | \
            while read i; do
                # i is the file name.
                if test "$i" == ./; then
                    # Homepage.
                    i=index.html
                    url="$baseurl/"
                elif test "$i" == ./sitemap -o "${i: -1}" == / -a -f "${i}index.html"; then
                    # This is a directory and it has an index.html, so we
                    # don't need to include it.
                    continue
                else
                    url="$baseurl${i:1}"
                    url="${url// /%20}"
                fi
                lastmod="`date --iso-8601=seconds -u -r "$i"`"
                # Google wants a colon in front of the last two digits.
                lastmod="${lastmod::22}:00"
                if test "${i: -5}" == .html; then
                    # HTML files (including the home page) have highest priority.
                    priority=1.0
                elif test "${i: -4}" == .txt; then
                    # Text files have medium priority.
                    priority=0.5
                else
                    # Everything else (source files etc.) has low priority.
                    priority=0.2
                fi
                echo "<url><loc>$url</loc><lastmod>$lastmod</lastmod><priority>$priority</priority></url>"
            done
        if [ $trace -eq 1 -o $verbose -eq 1 ] ; then
            set -o xtrace
        fi
        echo '</urlset>'
    ) > sitemap
    # sitemap is compressed on the remote site for smaller patch sizes.
fi

# -------------------- Push changes to remote server. --------------------

# SSH doesn't want to read id_dsa files which don't have 0600
# permissions.  This is getting into our way here, but we work around
# this by copying id_dsa to $lockdir/id_dsa and setting the
# permissions of the resulting id_dsa file to 0600.

# Copy the key.
cp $sshdir/id_dsa $lockdir/id_dsa
# SSH wants this.
chmod 0600 $lockdir/id_dsa

rm -f $htdocs_patchfile
# Create new tarball.
cd $htdocsdest
tar cf $htdocs_new_tarball .

# If there is no old tarball, we have to transmit the whole tarball.
if test ! -f $htdocs_tarball; then
    test $feedback && echo Transmitting entire tarball. || true
    gzip -c $htdocs_new_tarball | $sshcommand \
        "
        set -e
        umask 002
        cd $remoteproject
        gunzip -c > htdocs.tar
        cd $remotehtdocs
        tar xmf $remoteproject/htdocs.tar
        gzip -f sitemap
        "
# If the current and the new tarball differ, transmit patch file.
elif ! diff -q $htdocs_tarball $htdocs_new_tarball > /dev/null; then
    # Create patch.
    $bindir/bsdiff $htdocs_tarball $htdocs_new_tarball $htdocs_patchfile
    test $feedback && echo Patch size: `du -h $htdocs_patchfile | sed 's/\t.*//'` || true
    # Delete current tarball.  If something goes wrong with uploading
    # and applying the patch file, docutils-update will notice that
    # the tarball isn't present at the next run and transfer the whole
    # tarball, because we're left in an undefined state (the servers
    # are out of sync).
    rm -f $htdocs_tarball
    # Upload patch file.
    $sshcommand \
        "
        set -e
        umask 002
        cd $remoteproject
        cat > htdocs.patch
        ~/bin/bspatch htdocs.tar htdocs.new.tar htdocs.patch
        cd $remotehtdocs
        tar xmf $remoteproject/htdocs.new.tar
        gzip -f sitemap
        cd $remoteproject
        mv htdocs.new.tar htdocs.tar
        rm -f htdocs.patch
        " \
            < $htdocs_patchfile
fi
mv $htdocs_new_tarball $htdocs_tarball

# Tidy up.
rm -f $htdocs_patchfile
trap - 0 1 2 3 15
rm -rf $lockdir
test $feedback && echo '...docutils-update done.' || true

# Local Variables:
# indent-tabs-mode: nil
# End:
