summaryrefslogtreecommitdiff
path: root/scripts/make_errorcodes.py
blob: d2842932edf1769ede946d568cb43443fc6121e6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python
"""Generate the errorcodes module starting from PostgreSQL documentation.

The script can be run at a new PostgreSQL release to refresh the module.
"""

# Copyright (C) 2010 Daniele Varrazzo  <daniele.varrazzo@gmail.com>
#
# psycopg2 is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# psycopg2 is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
# License for more details.

import sys
import urllib2
from collections import defaultdict

from BeautifulSoup import BeautifulSoup as BS

def main():
    if len(sys.argv) != 2:
        print >>sys.stderr, "usage: %s /path/to/errorcodes.py" % sys.argv[0]
        return 2

    filename = sys.argv[1]

    file_start = read_base_file(filename)
    classes, errors = fetch_errors(
        ['8.1', '8.2', '8.3', '8.4', '9.0', '9.1', '9.2'])

    f = open(filename, "w")
    for line in file_start:
        print >>f, line
    for line in generate_module_data(classes, errors):
        print >>f, line

def read_base_file(filename):
    rv = []
    for line in open(filename):
        rv.append(line.rstrip("\n"))
        if line.startswith("# autogenerated"):
            return rv

    raise ValueError("can't find the separator. Is this the right file?")

def parse_errors(url):
    page = BS(urllib2.urlopen(url))
    table = page('table')[1]('tbody')[0]

    classes = {}
    errors = defaultdict(dict)

    for tr in table('tr'):
        if tr.td.get('colspan'): # it's a class
            label = ' '.join(' '.join(tr(text=True)).split()) \
                .replace(u'\u2014', '-').encode('ascii')
            assert label.startswith('Class')
            class_ = label.split()[1]
            assert len(class_) == 2
            classes[class_] = label

        else: # it's an error
            errcode = tr.tt.string.encode("ascii")
            assert len(errcode) == 5

            tds = tr('td')
            if len(tds) == 3:
                errlabel = '_'.join(tds[1].string.split()).encode('ascii')

                # double check the columns are equal
                cond_name = tds[2].string.strip().upper().encode("ascii")
                assert errlabel == cond_name, tr

            elif len(tds) == 2:
                # found in PG 9.1 docs
                errlabel = tds[1].tt.string.upper().encode("ascii")

            else:
                assert False, tr

            errors[class_][errcode] = errlabel

    return classes, errors

errors_url="http://www.postgresql.org/docs/%s/static/errcodes-appendix.html"

def fetch_errors(versions):
    classes = {}
    errors = defaultdict(dict)

    for version in versions:
        c1, e1 = parse_errors(errors_url % version)
        classes.update(c1)
        for c, cerrs in e1.iteritems():
            errors[c].update(cerrs)

    return classes, errors

def generate_module_data(classes, errors):
    yield ""
    yield "# Error classes"
    for clscode, clslabel in sorted(classes.items()):
        err = clslabel.split(" - ")[1].split("(")[0] \
                .strip().replace(" ", "_").replace('/', "_").upper()
        yield "CLASS_%s = %r" % (err, clscode)
    
    for clscode, clslabel in sorted(classes.items()):
        yield ""
        yield "# %s" % clslabel

        for errcode, errlabel in sorted(errors[clscode].items()):
            yield "%s = %r" % (errlabel, errcode)

if __name__ == '__main__':
    sys.exit(main())