-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathemail_scraper.py
44 lines (29 loc) · 1.24 KB
/
email_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import re
from datetime import datetime
from bs4 import BeautifulSoup
def scrape(message_id, mime_msg):
soup = None
if mime_msg.is_multipart():
for p in mime_msg.get_payload():
if p.get_content_maintype() == 'text':
soup = BeautifulSoup(p.get_payload(decode=True), 'html.parser')
else:
soup = BeautifulSoup(mime_msg.get_payload(decode=True), 'html.parser')
regex_digits = r'\s*\d+\s*\.\s*\d+\s*'
regex = r'(?:GBP|£)('+regex_digits+')|('+regex_digits+')(?:GBP|£)'
moniestrings = soup.find_all(string=re.compile(regex))
max_money = None
for s in moniestrings:
if re.compile(regex).match(s) is None:
continue
money = float(list(filter(lambda x: x is not None, re.compile(regex).match(s).groups()))[0])
money = int(money * 100)
if max_money is None or money > max_money:
max_money = money
if max_money is None:
return 1,datetime.now(),"hi","downloadmoreram.com"
money = max_money
time = datetime.strptime(mime_msg['date'], '%a, %d %b %Y %H:%M:%S %z')
subject = mime_msg['subject']
email_link = "https://mail.google.com/mail/#inbox/{}".format(message_id)
return -money, time, subject, email_link