1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
#!/usr/bin/python
import sys, os, re, psycopg, ConfigParser, urlparse, gzip, bz2
from mx.DateTime import DateTime
from mx.DateTime.Timezone import utc_offset
logre=re.compile(r"\[(?P<day>..)/(?P<month>...)/(?P<year>....):"
r"(?P<hour>..):(?P<min>..):(?P<sec>..) "
r'(?P<zone>.*)\] "GET (?P<path>[^ "]+) HTTP/1.." 200')
month_names=['jan','feb','mar','apr','may','jun',
'jul','aug','sep','oct','nov','dec']
month_index = {}
for i in range(12):
month_index[month_names[i]] = i+1
def main(argv):
if len(argv) != 3:
print "Usage: apache_count.py configfile logfile"
raise SystemExit
# Read config file
p = ConfigParser.ConfigParser()
p.read(argv[1])
# Read server-relative URI prefix
files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2]
# Setup database connection
dbname = p.get('database', 'name')
dbuser = p.get('database', 'user')
dbpass = p.get('database', 'password')
dbconn = psycopg.connect(database=dbname, user=dbuser, password=dbpass)
cursor = dbconn.cursor()
filename = argv[2]
if filename.endswith(".gz"):
f = gzip.open(filename)
elif filename.endswith(".bz2"):
f = bz2.BZ2File(filename)
else:
f = open(filename)
cursor.execute("select value from timestamps where name='http'")
last_http = cursor.fetchone()[0]
downloads = {}
for line in f:
m = logre.search(line)
if not m:
continue
path = m.group('path')
if not path.startswith(files_url):
continue
day = int(m.group('day'))
month = m.group('month').lower()
month = month_index[month]
year = int(m.group('year'))
hour = int(m.group('hour'))
minute = int(m.group('min'))
sec = int(m.group('sec'))
date = DateTime(year, month, day, hour, minute, sec)
zone = utc_offset(m.group('zone'))
date = date - zone
if date < last_http:
continue
filename = os.path.basename(path)
# see if we have already read the old download count
if not downloads.has_key(filename):
cursor.execute("select downloads from release_files "
"where filename=%s", (filename,))
record = cursor.fetchone()
if not record:
# No file entry. Could be a .sig file
continue
# make sure we're working with a number
downloads[filename] = record[0] or 0
# add a download
downloads[filename] += 1
if not downloads:
return
# Update the download counts
for filename, count in downloads.items():
cursor.execute("update release_files set downloads=%s "
"where filename=%s", (count, filename))
# Update the download timestamp
date = psycopg.TimestampFromMx(date)
cursor.execute("update timestamps set value=%s "
"where name='http'", (date,))
dbconn.commit()
if __name__=='__main__':
main(sys.argv)
|