summaryrefslogtreecommitdiff
path: root/tools/apache_count.py
blob: a6d9bb4b35811ab2c142897f721af905496c3bd6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/python

import sys, os, re, psycopg, ConfigParser, urlparse, gzip, bz2
from mx.DateTime import DateTime
from mx.DateTime.Timezone import utc_offset

logre=re.compile(r"\[(?P<day>..)/(?P<month>...)/(?P<year>....):"
                   r"(?P<hour>..):(?P<min>..):(?P<sec>..) "
                   r'(?P<zone>.*)\] "GET (?P<path>[^ "]+) HTTP/1.." 200')

month_names=['jan','feb','mar','apr','may','jun',
             'jul','aug','sep','oct','nov','dec']
month_index = {}
for i in range(12):
    month_index[month_names[i]] = i+1

def main(argv):
    if len(argv) != 3:
        print "Usage: apache_count.py configfile logfile"
        raise SystemExit
    # Read config file
    p = ConfigParser.ConfigParser()
    p.read(argv[1])
    # Read server-relative URI prefix
    files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2]
    # Setup database connection
    dbname = p.get('database', 'name')
    dbuser = p.get('database', 'user')
    dbpass = p.get('database', 'password')
    dbconn = psycopg.connect(database=dbname, user=dbuser, password=dbpass)
    cursor = dbconn.cursor()

    filename = argv[2]
    if filename.endswith(".gz"):
        f = gzip.open(filename)
    elif filename.endswith(".bz2"):
        f = bz2.BZ2File(filename)
    else:
        f = open(filename)

    cursor.execute("select value from timestamps where name='http'")
    last_http = cursor.fetchone()[0]

    downloads = {}
    for line in f:
        m = logre.search(line)
        if not m:
            continue
        path = m.group('path')
        if not path.startswith(files_url):
            continue
        day = int(m.group('day'))
        month = m.group('month').lower()
        month = month_index[month]
        year = int(m.group('year'))
        hour = int(m.group('hour'))
        minute = int(m.group('min'))
        sec = int(m.group('sec'))
        date = DateTime(year, month, day, hour, minute, sec)
        zone = utc_offset(m.group('zone'))
        date = date - zone
        
        if date < last_http:
            continue

        filename = os.path.basename(path)
        # see if we have already read the old download count
        if not downloads.has_key(filename):
            cursor.execute("select downloads from release_files "
                           "where filename=%s", (filename,))
            record = cursor.fetchone()
            if not record:
                # No file entry. Could be a .sig file
                continue
            # make sure we're working with a number
            downloads[filename] = record[0] or 0
        # add a download
        downloads[filename] += 1

    if not downloads:
        return

    # Update the download counts
    for filename, count in downloads.items():
        cursor.execute("update release_files set downloads=%s "
                       "where filename=%s", (count, filename))
    # Update the download timestamp
    date = psycopg.TimestampFromMx(date)
    cursor.execute("update timestamps set value=%s "
                   "where name='http'", (date,))
    dbconn.commit()

if __name__=='__main__':
    main(sys.argv)