| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
 | #!/usr/bin/env python3
"""Classes to parse mailer-daemon messages."""
import calendar
import email.message
import re
import os
import sys
class Unparseable(Exception):
    pass
class ErrorMessage(email.message.Message):
    def __init__(self):
        email.message.Message.__init__(self)
        self.sub = ''
    def is_warning(self):
        sub = self.get('Subject')
        if not sub:
            return 0
        sub = sub.lower()
        if sub.startswith('waiting mail'):
            return 1
        if 'warning' in sub:
            return 1
        self.sub = sub
        return 0
    def get_errors(self):
        for p in EMPARSERS:
            self.rewindbody()
            try:
                return p(self.fp, self.sub)
            except Unparseable:
                pass
        raise Unparseable
# List of re's or tuples of re's.
# If a re, it should contain at least a group (?P<email>...) which
# should refer to the email address.  The re can also contain a group
# (?P<reason>...) which should refer to the reason (error message).
# If no reason is present, the emparse_list_reason list is used to
# find a reason.
# If a tuple, the tuple should contain 2 re's.  The first re finds a
# location, the second re is repeated one or more times to find
# multiple email addresses.  The second re is matched (not searched)
# where the previous match ended.
# The re's are compiled using the re module.
emparse_list_list = [
    'error: (?P<reason>unresolvable): (?P<email>.+)',
    ('----- The following addresses had permanent fatal errors -----\n',
     '(?P<email>[^ \n].*)\n( .*\n)?'),
    'remote execution.*\n.*rmail (?P<email>.+)',
    ('The following recipients did not receive your message:\n\n',
     ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
    '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
    '^<(?P<email>.*)>:\n(?P<reason>.*)',
    '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
    '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
    '^Original-Recipient: rfc822;(?P<email>.*)',
    '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
    '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
    '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
    '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
    '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
    ]
# compile the re's in the list and store them in-place.
for i in range(len(emparse_list_list)):
    x = emparse_list_list[i]
    if isinstance(x, str):
        x = re.compile(x, re.MULTILINE)
    else:
        xl = []
        for x in x:
            xl.append(re.compile(x, re.MULTILINE))
        x = tuple(xl)
        del xl
    emparse_list_list[i] = x
    del x
del i
# list of re's used to find reasons (error messages).
# if a string, "<>" is replaced by a copy of the email address.
# The expressions are searched for in order.  After the first match,
# no more expressions are searched for.  So, order is important.
emparse_list_reason = [
    r'^5\d{2} <>\.\.\. (?P<reason>.*)',
    r'<>\.\.\. (?P<reason>.*)',
    re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
    re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
    re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
    ]
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
def emparse_list(fp, sub):
    data = fp.read()
    res = emparse_list_from.search(data)
    if res is None:
        from_index = len(data)
    else:
        from_index = res.start(0)
    errors = []
    emails = []
    reason = None
    for regexp in emparse_list_list:
        if isinstance(regexp, tuple):
            res = regexp[0].search(data, 0, from_index)
            if res is not None:
                try:
                    reason = res.group('reason')
                except IndexError:
                    pass
                while 1:
                    res = regexp[1].match(data, res.end(0), from_index)
                    if res is None:
                        break
                    emails.append(res.group('email'))
                break
        else:
            res = regexp.search(data, 0, from_index)
            if res is not None:
                emails.append(res.group('email'))
                try:
                    reason = res.group('reason')
                except IndexError:
                    pass
                break
    if not emails:
        raise Unparseable
    if not reason:
        reason = sub
        if reason[:15] == 'returned mail: ':
            reason = reason[15:]
        for regexp in emparse_list_reason:
            if isinstance(regexp, str):
                for i in range(len(emails)-1,-1,-1):
                    email = emails[i]
                    exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
                    res = exp.search(data)
                    if res is not None:
                        errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
                        del emails[i]
                continue
            res = regexp.search(data)
            if res is not None:
                reason = res.group('reason')
                break
    for email in emails:
        errors.append(' '.join((email.strip()+': '+reason).split()))
    return errors
EMPARSERS = [emparse_list]
def sort_numeric(a, b):
    a = int(a)
    b = int(b)
    if a < b:
        return -1
    elif a > b:
        return 1
    else:
        return 0
def parsedir(dir, modify):
    os.chdir(dir)
    pat = re.compile('^[0-9]*$')
    errordict = {}
    errorfirst = {}
    errorlast = {}
    nok = nwarn = nbad = 0
    # find all numeric file names and sort them
    files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
    files.sort(sort_numeric)
    for fn in files:
        # Lets try to parse the file.
        fp = open(fn)
        m = email.message_from_file(fp, _class=ErrorMessage)
        sender = m.getaddr('From')
        print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
        if m.is_warning():
            fp.close()
            print('warning only')
            nwarn = nwarn + 1
            if modify:
                os.rename(fn, ','+fn)
##              os.unlink(fn)
            continue
        try:
            errors = m.get_errors()
        except Unparseable:
            print('** Not parseable')
            nbad = nbad + 1
            fp.close()
            continue
        print(len(errors), 'errors')
        # Remember them
        for e in errors:
            try:
                mm, dd = m.getdate('date')[1:1+2]
                date = '%s %02d' % (calendar.month_abbr[mm], dd)
            except:
                date = '??????'
            if e not in errordict:
                errordict[e] = 1
                errorfirst[e] = '%s (%s)' % (fn, date)
            else:
                errordict[e] = errordict[e] + 1
            errorlast[e] = '%s (%s)' % (fn, date)
        fp.close()
        nok = nok + 1
        if modify:
            os.rename(fn, ','+fn)
##          os.unlink(fn)
    print('--------------')
    print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
    print(nbad,'files unparseable')
    print('--------------')
    list = []
    for e in errordict.keys():
        list.append((errordict[e], errorfirst[e], errorlast[e], e))
    list.sort()
    for num, first, last, e in list:
        print('%d %s - %s\t%s' % (num, first, last, e))
def main():
    modify = 0
    if len(sys.argv) > 1 and sys.argv[1] == '-d':
        modify = 1
        del sys.argv[1]
    if len(sys.argv) > 1:
        for folder in sys.argv[1:]:
            parsedir(folder, modify)
    else:
        parsedir('/ufs/jack/Mail/errorsinbox', modify)
if __name__ == '__main__' or sys.argv[0] == __name__:
    main()
 |