#!/usr/bin/env python # # mailstats.py - collect & report postscreen(8) statistics # Sahil Tandon import argparse, bz2, re, sys from collections import defaultdict # Initialize the argument parser & specify arguments parser = argparse.ArgumentParser() parser.add_argument('filename', metavar='FILE', help='path to maillog') parser.add_argument('-a', dest='action', help='filter results based on this action', choices=['DNSBL', 'PREGREET']) parser.add_argument('-w', dest='userwl', metavar='WL', nargs='*', default=[], help='DNS whitelists') args = parser.parse_args() # Initialize dns black and whitelists; the latter include # two defaults as well as user-defined zones. Blacklists # are populated by the parse() function as it steps through # the log lines. # # IMPORTANT: dnsblog(8) entries with a zone not included in # a whitelist are assumed to be blacklist hits. dnsbls = [] dnswls = set(['list.dnswl.org', 'swl.spamhaus.org'] + args.userwl) # DRY: define common parts of compiled regular expressions conn_re = ']: CONNECT' addr_re = '.*\[([^]]+)\]:\d+' rcpt_re = ': reject: RCPT from' # pattern_list is used to grow pattern_dict, a dictionary populated # with keys that correspond to postscreen(8) events, and their regex # values. pattern_list = [ 'BARE NEWLINE', 'BLACKLISTED', 'COMMAND COUNT LIMIT','COMMAND TIME LIMIT', 'COMMAND PIPELINING', 'DISCONNECT', 'DNSBL', 'HANGUP', 'NON-SMTP', 'PASS NEW', 'PASS OLD', 'PREGREET', 'WHITELISTED' ] pattern_dict = { 'CONNECT':re.compile(r'%s%s ' % (conn_re, addr_re)), 'reject (too many connections)':re.compile(r'%s%s: too many' % (conn_re, addr_re)), 'reject (all server ports busy)':re.compile(r'%s%s: all server' % (conn_re, addr_re)), 'reject (450)':re.compile(r'%s%s: 450' % (rcpt_re, addr_re)), 'reject (550)':re.compile(r'%s%s: 550' % (rcpt_re, addr_re)) } for pattern in pattern_list: pattern_dict[pattern] = re.compile(r': %s%s' % (pattern, addr_re)) # Initialize two multi-dimensional dictionaries. addr_dict = defaultdict(lambda: defaultdict(int)) rank_dict = defaultdict(int) # Iterate through each line of the log file, collecting postscreen(8) # and dnsblog(8) statistics per IP address. Also note postscreen(8) # connections per secon def parse(filename): with open(filename) as file: for line in file: if 'postfix/postscreen' in line: for event, regex in pattern_dict.items(): match = regex.search(line) if match: addr_dict[match.group(1)][event] += 1 break match = re.search(r'rank (\d+) for \[[^]]+\]:\d+', line) if match: rank_dict[match.group(1)] += 1 elif 'postfix/dnsblog' in line: match = re.search(r': addr ([\d.]+) listed by domain ([\w.]+) as', line) if match: addr, zone = match.groups() addr_dict[addr][zone] = 1 if zone in dnswls: pass elif zone not in dnsbls: dnsbls.append(zone) else: continue # Jan 31 22:37:10 cricket postfix/postscreen[35464]: DNSBL rank 7 for [180.253.3.203]:18541 # A filter used to prune the IP list based on the presence of a particular # event, e.g. DNSBL hit. If no event is specified, return True. def prune(ip, event): if event: return addr_dict[ip][event] > 0 else: return True # TOTAL refers to the number of times each event was triggered, while # UNIQ corresponds to the number of unique IPs that triggered the event. def event_report(filt=None): align = '{0:>8}{1}{2:<8}{3:<}' print print align.format("UNIQ", "/", "TOTAL", "EVENT") for event in sorted(pattern_dict): total = sum([addr_dict[ip][event] for ip in addr_dict if prune(ip, filt)]) if total == 0: continue uniq = len([ip for ip in addr_dict if addr_dict[ip][event] if prune(ip, filt)]) print align.format(uniq, "/", total, event) print # In the context of DNS{B,W}Ls, TOTAL corresponds to the number of IPs listed # in a zone, while UNIQ is the number of IPs that are listed *only* by # that zone. Also report the number of overlap among DNS{B,W}L hits. def dns_report(zones): align = '{0:>8}{1}{2:<8}{3:<24}{4:>6}' (str1, str2, dnsl1, dnsl2) = ('DNSBL','DNSWL', dnsbls, dnswls) if 'list.dnswl.org' in zones: (str1, str2, dnsl1, dnsl2) = (str2, str1, dnsl2, dnsl1) print align.format("UNIQ", "/", "TOTAL", str1, str2) for zone in sorted(zones): hits = len([ip for ip in addr_dict if addr_dict[ip][zone]]) excl = len([ip for ip in addr_dict if addr_dict[ip][zone] if sum([addr_dict[ip][x] for x in dnsl1]) == 1]) olap = len([ip for ip in addr_dict if addr_dict[ip][zone] if sum([addr_dict[ip][x] for x in dnsl2]) != 0]) print align.format(excl, "/", hits, zone, olap) print parse(args.filename) event_report(args.action) dns_report(dnsbls) dns_report(dnswls) for rank in sorted(rank_dict, key=rank_dict.get, reverse=True): print '{0:>11} {1:>2} : {2}'.format("DNSBL Rank", rank, rank_dict[rank]) print