Python script to strip out bounced emails from a mailing list
The title says it all. Here it is
def grab_email(files = []):
# if passed a list of text files, will return a list of
# email addresses found in the files, matched according to
# basic address conventions. Note: supports most possible
# names, but not all valid ones.
found = []
if files != None:
mailsrch = re.compile(r'[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,5}')
for file in files:
for line in open(file,'r'):
found.extend(mailsrch.findall(line))
# remove duplicate elements
# borrowed from Tim Peters' algorithm on ASPN Cookbook
u = {}
for item in found:
u[item] = 1
# return list of unique email addresses
return u.keys()
def chunk_array(inlist,size):
'''this is to chunk the email list into bite size pieces.'''
outlist = []
while inlist:
outlist.append(inlist[0:size])
del inlist[0:size]
return outlist
results = grab_email(['bouncefile.txt'])
results = [(x.lower(),) for x in results]
results = chunk_array(results,5)
db = MySQLdb.connect(host="yourhost",user="root",passwd="yourpass",db="yourdb",port=3306)
c = db.cursor()
start = time.time()
print 'connected'
i = 0
for result in results:
c.executemany('''update orders set emailBillingSpam = 0 where lower(emailBilling) = %s''', result)
c.executemany('''update orders set emailShippingSpam = 0 where lower(emailShipping) = %s''', result)
db.commit()
i = i + 5
now = time.time()
print i ,i/(now - start),'requests per second'