-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimaptocsv.py
67 lines (58 loc) · 1.87 KB
/
imaptocsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
module to read out messages into csv
"""
from imaplib import IMAP4_SSL
from datetime import date, timedelta
from time import strftime
import email
import dateutil.parser
import numpy as np
import pandas as pd
import configparser
import argparse
from mailworker import MailWorker
config = configparser.ConfigParser()
config.read('config.ini')
def usefulHeaders():
return [
'Answered',
'UID',
'Auto-Submitted',# whether its an auto-reply
'Content-Language',
'Date',
'From',
'To',
'CC',
'Cc',
'Importance',
'NewSubject',
'NewMessageText',
]
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='specify number of email data volume')
parser.add_argument('volume_number', type=int, nargs='?', help='specify the data volume number')
args = parser.parse_args()
if args.volume_number == None:
args.volume_number = 1
print("VOLUME NUMBER: ", int(args.volume_number))
mw = MailWorker()
data, answereddata = mw.get_message_id_list(int(args.volume_number))
mails = mw.fetch(data, answereddata)
mw.mailclose()
print(len(mails), "mails in dataset")
mydata = np.empty((len(mails), len(usefulHeaders())), dtype='object')
row = 0
for m in mails:
col = 0
for k in usefulHeaders():
mydata[row][col] = m[k]
col += 1
row += 1
print("populated ndarray")
MyDataFrame = pd.DataFrame(mydata, columns=usefulHeaders(), dtype=str)
if args.volume_number == 1:
MyDataFrame.to_csv(config['DATA']['recent_file'], sep=';', index=False)
elif args.volume_number == 2:
MyDataFrame.to_csv(config['DATA']['primary_data_file'], sep=';', index=False)
else:
MyDataFrame.to_csv(config['DATA']['primary_data_file'], sep=';', mode='a', header=False, index=False)