@duceduc Last update… Tested intensively on GMAIL and a little bit on Yahoo…
IMPORTANT: Be careful about the new arguments added to include Yahoo mail in the program and create a test mode (no delete), so more arguments are required !
There are also more information displayed like Date/Subject and Sender for each Email identified as older than x days…
I tried to take into account all the problems I had with special characters in subject, sender email and date… Added more tests about the parameters passed to the program.
A summary has been added at the end of the execution to tell you how many mails could/have been deleted out of x number of mails in the folder…
Let me know if it is working for you and if you are experiencing any additional bugs or problems…
The code:
#!/bin/python3
#====================================================================================================================================
#
# program to delete Emails in GMAIL/YAHOO older than x days in specific folders
# Arguments:
# Arg1 (required): Username to access the mailbox (example: [email protected])
# Arg2 (required): Password to access the mailbox (example: my_password)
# Recommendation: create an application password in GMAIL/YAHOO to avoid some login problems (denial) when connecting to the mailbox
# Arg3 (required): Days to keep (example: 10)
# Arg4 (required): G for Gmail or Y for Yahoo Mail
# Arg5 (required): T for test (no delete) or D for deletion (delete Emails older than x days (see Arg3))
# Arg6 (optional): List of folder names to clean (if no arguments are provided, the default folder is "INBOX" for GMAIl and "Inbox" for Yahoo) (example: INBOX Protection Commercial)
# Example of commands (the first one with 3 folders to cleanup in Test Mode (no delete), the second one will cleanup "Inbox" only with Delete):
# empty_mailbox.py [email protected] my_password 10 G T INBOX Protection Commercial
# empty_mailbox.py [email protected] my_password 10 Y D
#
# IMPORTANT: tested intensively on GMAIL and a little bit on Yahoo Mail... Use it at your own risks
#
#====================================================================================================================================
import datetime
import imaplib, email
from email.header import decode_header
import sys
#====================================================================================================================================
#
# create function to delete mail in a mailbox folder older than x days
# arguments: folder name, number of days to keep
#
#====================================================================================================================================
def delete_email_in_folder(folder,days,test_prod):
print("Deleting Emails in",folder,"older than",days,"days ...")
global summary
imap.select(folder)
# number of mails in the folder
typ, message_id_list = imap.search(None,'All')
# convert the string ids to list of email ids
messages = message_id_list[0].split(b' ')
# read the first mail id if empty no mail in inbox and decode it from bytes to characters
message_str = messages[0]
message_str = message_str.decode('utf-8')
if ((message_str) != ''):
print (" Number of Email(s) in folder",folder,":",len(messages))
tot_email = (len(messages))
else:
print (" Folder",folder,"is empty")
tot_email = 0
# create the list of mails to delete based on the number of days to keep
before_date = (datetime.date.today() - datetime.timedelta(days)).strftime("%d-%b-%Y") # date string, 04-Jan-2013
typ, message_id_list = imap.search(None, '(BEFORE {0})'.format(before_date)) # search pointer for msgs before before_date
# convert the string ids to list of email ids
messages = message_id_list[0].split(b' ')
# read the first mail id to delete if empty no mail to delete
message_str = messages[0]
message_str = message_str.decode('utf-8')
if ((message_str) != ''):
# count the number of messages to delete
count = (len(messages))
# storing information in summary variable (global)
if (count != 1):
if (test_prod == 'D'):
summary = (summary + " " + (str(count)) + " mail(s) deleted in folder " + folder + " out of a total of " + (str(tot_email)) + " Email(s) \n")
else:
summary = (summary + " TEST !!! " + (str(count)) + " mail(s) could be deleted in folder " + folder + " out of a total of " + (str(tot_email)) + " Email(s) \n")
else:
if (test_prod == 'D'):
summary = (summary + " " + (str(count)) + " mail deleted in folder " + folder + " out of a total of " + (str(tot_email)) + " Email(s) \n")
else:
summary = (summary + " TEST !!! " + (str(count)) + " mail(s) could be deleted in folder " + folder + " out of a total of " + (str(tot_email)) + " Email(s) \n")
# loop to delete messages
while count > 0:
# print the number of remaining mails to delete
print(">>>",((datetime.datetime.today()).strftime("%Y-%b-%d %H:%M:%S"))," ",count, "mail(s) remaining to delete")
# read email message
res, msg = imap.fetch(messages[count-1], "(RFC822)")
# extract subject and from email address
for response in msg:
if isinstance(response, tuple):
msg = email.message_from_bytes(response[1])
subject, From, Date = obtain_header(msg)
# mark the mail as deleted
if (test_prod == 'D'):
imap.store(messages[count-1], "+FLAGS", "\\Deleted")
else:
print(" TEST !!! Execution in test mode ... No deletion ...")
# Printing Subject and From information
print(" Date :", Date)
print(" Subject:", subject)
print(" From :", From)
# decrement the counter by one
count = count - 1
else:
# storing information in summary variable (global)
summary = (summary + " No mail deleted in folder " + folder + " out of a total of " + (str(tot_email)) + " Email(s) \n")
print(" No mail to delete ...")
#====================================================================================================================================
#
# create function to extract subject, "from" email address information and the date of the current Email to delete
# arguments: mail message
# Return: the subject, the From information and the date of the Email (the date the Email was sent... not the date the Email was received !)
#
#====================================================================================================================================
def obtain_header(msg):
# decode the email subject taking into account that the subject could be splitted in more than one record,
# all the records will be decoded with the same code than the one found in the subject records.
count = (len(decode_header(msg["Subject"])))
subject_ret = ""
encoding_mem = ""
# finding the decoding method (if any) in the various records found in subject
while count > 0:
count = count -1
subject, encoding = decode_header(msg["Subject"])[count]
if (encoding != None):
encoding_mem = encoding
# decoding the subject records with the decoding method found in subject
count = (len(decode_header(msg["Subject"])))
while count > 0:
count = count -1
subject, encoding = decode_header(msg["Subject"])[count]
if isinstance(subject, bytes):
subject = subject.decode(encoding_mem)
subject_ret = subject + subject_ret
# decode the email sender taking into account that the sender could be splitted in more than one record,
# all the records will be decoded with the same code than the one found in the sender records.
count = (len(decode_header(msg["From"])))
From_ret = ""
encoding_mem = ""
# finding the decoding method (if any) in the various records found in sender
while count > 0:
count = count -1
From, encoding = decode_header(msg["From"])[count]
if (encoding != None):
encoding_mem = encoding
# decoding the sender records with the decoding method found in sender
count = (len(decode_header(msg["From"])))
while count > 0:
count = count -1
From, encoding = decode_header(msg["From"])[count]
if isinstance(From, bytes):
From = From.decode(encoding_mem)
From_ret = From + From_ret
# decode email date (sent)
Date, encoding = decode_header(msg.get("Date"))[0]
if isinstance(Date, bytes):
Date = Date.decode(encoding)
# remove the timezone (like (UTC), (CET)...) at the end of the date
if ("(" in Date):
Date = (Date.split("(",1)[0])[:-1]
# replace the GMT timezone by +0000
if ("GMT" in Date):
Date = (Date.replace("GMT","+0000"))
# transform date to local date (your current timezone)
if ("," in Date):
datetime_object = datetime.datetime.strptime(Date,'%a, %d %b %Y %H:%M:%S %z')
else:
datetime_object = datetime.datetime.strptime(Date,'%d %b %Y %H:%M:%S %z')
#Convert it to your local timezone
d=datetime_object.astimezone()
Date = d.strftime("%a, %d %b %Y %H:%M:%S %z")
return subject_ret, From_ret, Date
#====================================================================================================================================
#
# Main Program
#
#====================================================================================================================================
print (">>>",(datetime.datetime.today()).strftime("%Y-%b-%d %H:%M:%S"))
# read arguments: username, password and number of days have to be provided, folders are optional
arg = sys.argv
# not enough arguments, error message and exit
if (len(arg) <= 4):
print("Error ! not enough arguments: at least 4 are required.")
print(" User, password, number of days and T (for Test) or D (for Deletion) are mandatory.")
exit()
# store arguments in variables
my_email = sys.argv[1]
app_generated_password = sys.argv[2]
days = int(sys.argv[3])
mail_ident = sys.argv[4]
test_prod = sys.argv[5]
folder = sys.argv[6:]
summary = ""
# error: argument 4 is not G or Y
if ((mail_ident != "G") & (mail_ident != "Y")):
print("Error ! Mailbox (4th argument) must be G (for Gmail) ou Y for (Yahoo Mail)")
exit()
# error: argument 5 is not D or T
if ((test_prod != "D") & (test_prod != "T")):
print("Error ! Delete mode (5th argument) must be T ou D")
exit()
# initialize IMAP object for Mailbox
if (mail_ident == "G"):
imap = imaplib.IMAP4_SSL("imap.gmail.com")
mailbox_name = "GMAIL"
# if no folder has been provided, "INBOX" is the default for GMAIL
if (len(folder) == 0):
folder = ["INBOX"]
if (mail_ident == "Y"):
imap = imaplib.IMAP4_SSL("imap.mail.yahoo.com")
mailbox_name = "YAHOO Mail"
# if no folder has been provided, "Inbox" is the default for GMAIL
if (len(folder) == 0):
folder = ["Inbox"]
print("Connecting to",mailbox_name,"...")
# login to mailbox with credentials
imap.login(my_email, app_generated_password)
print(mailbox_name,"connected ...\n")
# retrieving list of folders
print("List of various folders:")
folder_list = imap.list()
# error: not able to retrieve list of inboxes
if (folder_list[0] != "OK"):
print("Error ! Not able to retrieve list of Inboxes")
exit()
folders = folder_list[1]
# extract the list of folder names between quotes
list_of_folders = ""
for name in folders:
name_str = name.decode('utf-8')
name_str = name_str.split('/',1)
name_str = name_str[1]
print (name_str[3:-1], end='\n')
list_of_folders = list_of_folders + name_str[2:] + "\n"
# loop to execute the cleanup based on the number of folders provided as parameters
count = (len(folder))
loop = 0
while count > 0:
print(" ")
print(" ")
count = count - 1
# test if folder is included in the folder list
test_folder = ("\""+folder[count]+"\"")
if (test_folder in list_of_folders):
# delete Emails in folder older than x days
delete_email_in_folder(folder[count],days,test_prod)
loop = 1
else:
summary = (summary + " error: "+ folder[count] + " is not a valid folder name (see list above (case sensitive))\n")
print("Error ! Unknown folder in the list of folders:",folder[count])
print(" See above for the list, the folder name must be exactly the same (the folder name validation is case sensitive)")
else:
print(" ")
print(" ")
# delete all the selected messages
if (loop == 1):
if (test_prod == 'D'):
imap.expunge()
print("All selected mails have been deleted, summary:")
print(summary)
else:
print("No Email deleted as execution is in TEST mode, summary:")
print(summary)
# close the mailbox
imap.close()
print("Closing connection with",mailbox_name,"...")
# logout from the server
imap.logout()
print("Connection closed with",mailbox_name,"...")
print (">>>",(datetime.datetime.today()).strftime("%Y-%b-%d %H:%M:%S"))