Python script to strip out bounced emails from a mailing list

Posted by Joel Jensen Mon, 24 Jul 2006 18:54:51 GMT

The title says it all. Here it is

import re, MySQLdb,time

def grab_email(files = []):
    # if passed a list of text files, will return a list of
    # email addresses found in the files, matched according to
    # basic address conventions. Note: supports most possible
    # names, but not all valid ones.

    found = []
    if files != None:
        mailsrch = re.compile(r'[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,5}')

        for file in files:            
            for line in open(file,'r'):                
                found.extend(mailsrch.findall(line))    

    # remove duplicate elements
    # borrowed from Tim Peters' algorithm on ASPN Cookbook
    u = {}
    for item in found:
        u[item] = 1

    # return list of unique email addresses
    return u.keys()

def chunk_array(inlist,size):
    '''this is to chunk the email list into bite size pieces.'''
    outlist = []
    while inlist:
        outlist.append(inlist[0:size])
        del inlist[0:size]
    return outlist

results = grab_email(['bouncefile.txt'])

results = [(x.lower(),) for x in results]
results =  chunk_array(results,5)


db = MySQLdb.connect(host="yourhost",user="root",passwd="yourpass",db="yourdb",port=3306)
c  = db.cursor()
start = time.time()
print 'connected'
i = 0
for result in results:
    c.executemany('''update orders set emailBillingSpam  = 0 where lower(emailBilling)  = %s''', result)
    c.executemany('''update orders set emailShippingSpam = 0 where lower(emailShipping) = %s''', result)
    db.commit()

    i = i + 5
    now = time.time()
    print i ,i/(now - start),'requests per second'