Python pop3 email parser

Python Use Pop3 To Read Email Example

To receive an email you can write an MUA( Mail User Agent ) as the client, and retrieve the email from MDA ( Mail Delivery Agent ) to the user’s computer or mobile phone. The most commonly used protocol for receiving mail is the POP protocol. The current version number is 3, commonly known as POP3. Python has a built-in poplib module, which implements POP3 protocol and can be used to receive mail directly.

Note that the POP3 protocol does not receive the original readable message itself, but the encoded text of the message that SMTP sent. So in order to turn the text received by POP3 into a readable email, it is necessary to parse the original text with various classes provided by the email module and turn it into a readable email object. So there are two steps for you to receive email from a pop3 server in Python.

  1. Use poplib module to download the original text of the email to local.
  2. Parsing the original text use email module and parse it to a readable mail object.

1. Download Email Via POP3 In Python.

Below python code will get the latest email content.

# import python poplib module import poplib # input email address, password and pop3 server domain or ip address email = input('Email: ') password = input('Password: ') pop3_server = input('POP3 server: ') # connect to pop3 server: server = poplib.POP3(pop3_server) # open debug switch to print debug information between client and pop3 server. server.set_debuglevel(1) # get pop3 server welcome message. pop3_server_welcome_msg = server.getwelcome().decode('utf-8') # print out the pop3 server welcome message. print(server.getwelcome().decode('utf-8')) # user account authentication server.user(email) server.pass_(password) # stat() function return email count and occupied disk size print('Messages: %s. Size: %s' % server.stat()) # list() function return all email list resp, mails, octets = server.list() print(mails) # retrieve the newest email index number index = len(mails) # server.retr function can get the contents of the email with index variable value index number. resp, lines, octets = server.retr(index) # lines stores each line of the original text of the message # so that you can get the original text of the entire message use the join function and lines variable. msg_content = b'\r\n'.join(lines).decode('utf-8') # now parse out the email object. msg = Parser().parsestr(msg_content) # get email from, to, subject attribute value. email_from = msg.get('From') email_to = msg.get('To') email_subject = msg.get('Subject') print('From ' + email_from) print('To ' + email_to) print('Subject ' + email_subject) # delete the email from pop3 server directly by email index. # server.dele(index) # close pop3 server connection. server.quit()

2. Parse Email To Message Object.

# import parse email action required python parser module from email.parser import Parser from email.header import decode_header from email.utils import parseaddr import poplib # parse the email content to a message object. msg = Parser().parsestr(msg_content)

But the Message object itself may be a MIMEMultipart object, which contains nested MIMEBase objects, and the nesting may be more than one layer. So we have to print out the hierarchy of the Message object recursively.

# variable indent_number is used to decide number of indent of each level in the mail multiple bory part. def print_info(msg, indent_number=0): if indent_number == 0: # loop to retrieve from, to, subject from email header. for header in ['From', 'To', 'Subject']: # get header value value = msg.get(header, '') if value: # for subject header. if header=='Subject': # decode the subject value value = decode_str(value) # for from and to header. else: # parse email address hdr, addr = parseaddr(value) # decode the name value. name = decode_str(hdr) value = u'%s ' % (name, addr) print('%s%s: %s' % (' ' * indent_number, header, value)) # if message has multiple part. if (msg.is_multipart()): # get multiple parts from message body. parts = msg.get_payload() # loop for each part for n, part in enumerate(parts): print('%spart %s' % (' ' * indent_number, n)) print('%s--------------------' % (' ' * indent_number)) # print multiple part information by invoke print_info function recursively. print_info(part, indent + 1) # if not multiple part. else: # get message content mime type content_type = msg.get_content_type() # if plain text or html content type. if content_type=='text/plain' or content_type=='text/html': # get email content content = msg.get_payload(decode=True) # get content string charset charset = guess_charset(msg) # decode the content with charset if provided. if charset: content = content.decode(charset) print('%sText: %s' % (' ' * indent_number, content + '. ')) else: print('%sAttachment: %s' % (' ' * indent_number, content_type)) # The Subject of the message or the name contained in the Email is encoded string # , which must decode for it to display properly, this function just provide the feature. def decode_str(s): value, charset = decode_header(s)[0] if charset: value = value.decode(charset) return value

The decde_header() function returns a list object, because email header fields such as cc and bcc may contain multiple mail addresses, so there are multiple elements parsed out. But in our code above we only took the first element.

Читайте также:  For files in folder python

The content of text email is also string type, so you need to detect the content string encoding charset. Otherwise, none utf-8 encoding email can not be displayed properly. Below function just implement this feature.

# check email content string encoding charset. def guess_charset(msg): # get charset from message object. charset = msg.get_charset() # if can not get charset if charset is None: # get message header content-type value and retrieve the charset from the value. content_type = msg.get('Content-Type', '').lower() pos = content_type.find('charset=') if pos >= 0: charset = content_type[pos + 8:].strip() return charset

3. Question & Answer.

3.1 How to use a python script to read emails from pop3 server ( for example outlook) filtered by the from address.

  1. Use the python poplib module to retrieve all the messages from the email server, then filter the message list which the from email address fits your needs on the client-side.
import poplib def pop3_receive_email_by_from_address(from_address): user_name = 'user_name' passwd = '12345678910' pop3_server_domain = 'pop3.gmail.com' pop3_server_port = '995' # Connect to pop3 email server. mail_box = poplib.POP3_SSL(pop3_server_domain, pop3_server_port) mail_box.user(user_name) mail_box.pass_(passwd) # Get number of existing emails. number_of_messages = len(mail_box.list()[1]) # Loop in the all emails. for i in range(number_of_messages): # Get one email. for msg in mail_box.retr(i+1)[1]: # Get the email from address. from = msg.get('From') # If the the email from address contains the desired from address. if(from.indexOf(from_address)>-1): print(msg) else: print('This message is not the one you want') mail_box.quit()
# Import the python imaplib module. import imaplib # The imap server host address. imap_server_host = 'mail.test.com' # Connect to the imap server with SSL. client = imaplib.IMAP4_SSL(host = imap_server_host) client.starttls() # Login to the imap email server with provided username and password. user_name = 'user_name' passwd = 'password' client.login(user_name, passwd) # List all the emails from the imap email server. client.list() # Select the INBOX mail folder. client.select('INBOX') # Search all the unread emails sent from the domain gmail.com. client.search(None, 'UNSEEN HEADER FROM "gmail.com"')

Leave a Comment Cancel Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.

Читайте также:  Test my java applet

Источник

strayge / python_email_parse.py

This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters

import poplib
import email
from base64 import b64decode
pop3_server = ‘pop.gmail.com’
pop3_port = ‘995’
username = ‘XXXXXXXXXX@gmail.com’
password = ‘XXXXXXXXXXXXXX’
M = poplib . POP3_SSL ( pop3_server , pop3_port )
M . user ( username )
M . pass_ ( password )
numMessages = len ( M . list ()[ 1 ])
def decode_header ( header ):
decoded_bytes , charset = email . header . decode_header ( header )[ 0 ]
if charset is None :
return str ( decoded_bytes )
else :
return decoded_bytes . decode ( charset )
for i in range ( numMessages ):
raw_email = b» \n » . join ( M . retr ( i + 1 )[ 1 ])
parsed_email = email . message_from_bytes ( raw_email )
print ( ‘=========== email #%i ============’ % i )
print ( ‘From:’ , parsed_email [ ‘From’ ])
print ( ‘To:’ , parsed_email [ ‘To’ ])
print ( ‘Date:’ , parsed_email [ ‘Date’ ])
print ( ‘Subject:’ , decode_header ( parsed_email [ ‘Subject’ ]))
for part in parsed_email . walk ():
if part . is_multipart ():
# maybe need also parse all subparts
continue
elif part . get_content_maintype () == ‘text’ :
text = part . get_payload ( decode = True ). decode ( part . get_content_charset ())
print ( ‘Text: \n ‘ , text )
elif part . get_content_maintype () == ‘application’ and part . get_content_disposition () == ‘attachment’ :
name = decode_header ( part . get_filename ())
body = part . get_payload ( decode = True )
size = len ( body )
print ( ‘Attachment: «<>«, size: <> bytes, starts with: «<>«‘ . format ( name , size , body [: 50 ]))
else :
print ( ‘Unknown part:’ , part . get_content_type ())
print ( ‘======== email #%i ended =========’ % i )

Источник

waleedahmad / read_pop3ssl_emails.py

This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters

import poplib , getpass , email
import re
Mailbox = poplib . POP3_SSL ( ‘mail.privateemail.com’ )
Mailbox . user ( ‘user@domain.com’ )
Mailbox . pass_ ( ‘pass’ )
emails = []
tries = 0
fail_tries = 300
subject = «Undelivered Mail Returned to Sender»
month = «Jul 2018»
not_found = 0
total_emails = 0
# Read dirty emails from emails.txt file
def read_emails ():
global emails , total_emails
print ( ‘Reading dirty emails. ‘ )
print ( ‘Script will exit after failing to read following Subject and Month \n \
in parsed emails ‘ , fail_tries , ‘times’ )
print ( ‘Subject : ‘ , subject , ‘ \n Month : ‘ , month )
with open ( ’emails.txt’ , ‘r’ ) as f :
emails = [ line . strip () for line in f ]
total_emails = len ( emails )
# Write filtered emails to clean_emails.txt file
def write_emails ():
global emails
print ( ‘Writing clean emails to file. ‘ )
file = open ( «clean_emails.txt» , «w» )
for email in emails :
file . write ( email + » \n » )
file . close ()
print ( ‘Done!’ )
# Total inbox messages
def get_total_emails ():
return len ( Mailbox . list ()[ 1 ])
# Parse emails from mail text and filter dirty emails
def filter_emails ( total ):
global emails , tries , fail_tries , subject , month , not_found
for i in reversed ( range ( total )):
raw_email = b» \n » . join ( Mailbox . retr ( i + 1 )[ 1 ])
parsed_email = email . message_from_bytes ( raw_email )
if subject in parsed_email [ «Subject» ] and month in parsed_email [ «Date» ]:
payload = parsed_email . get_payload ()[ 0 ]
body = payload . get_payload ()
match = re . search ( r'[\w\.-]+@[\w\.-]+’ , body )
remove_email = match . group ( 0 ). lstrip ()
try :
# print(parsed_email[«Date»])
# print(parsed_email[«Subject»])
# print(‘Remove’, remove_email)
emails . remove ( remove_email )
except :
# print(‘Not Found’, remove_email)
not_found += 1
pass
else :
tries += 1
if tries > fail_tries :
print ( ‘Exiting after failing’ , fail_tries , ‘times’ )
break
read_emails ()
filter_emails ( get_total_emails ())
write_emails ()
print ( ‘Total Emails Provided : ‘ , total_emails )
print ( ‘Filtered Emails : ‘ , len ( emails ))
print ( ‘Emails not found in emails.txt file : ‘ , not_found )

Источник

Оцените статью