diff --git a/sms_db_importer.py b/sms_db_importer.py index 6293336..c6a601c 100644 --- a/sms_db_importer.py +++ b/sms_db_importer.py @@ -1,40 +1,78 @@ -import argparse, sys, time, dateutil.parser, sqlite3, csv -debug = False -do_save = True #save the results? +import argparse, os, sys, time, dateutil.parser, sqlite3, csv, xml.dom.minidom +sms_debug = False +test_run = False #test = don't save results -def main(): +def sms_main(): parser = argparse.ArgumentParser(description='Import texts to android sms database file.') - inputgroup = parser.add_mutually_exclusive_group() - inputgroup.add_argument( "-csv", type=argparse.FileType('r'), help='input CSV file' ) - inputgroup.add_argument( "-iphone", type=str, help='input iPhone sms.db file' ) + parser.add_argument('infiles', nargs='+', type=argparse.FileType('r'), help='input files, may include multiple sources') parser.add_argument('outfile', type=str, help='output mmssms.db file use. Must alread exist.') - parser.add_argument('-d', action='store_true', dest='debug', help='extra info') - args = parser.parse_args()#"-iphone ../sms.db mmssms.db".split()) - global debug - debug = args.debug if args.debug else debug + parser.add_argument('-d', action='store_true', dest='sms_debug', help='sms_debug run: extra info, limits to 80, no save.') + parser.add_argument('-t', action='store_true', dest='test_run', help='Test run, no saving anything') + try: + args = parser.parse_args()#"-iphone ../sms.db mmssms.db".split()) + except IOError: + print "Problem opening file." + quit() - if args.csv: - starttime = time.time() - texts = readTextsFromCSV( args.csv ) - print "got all texts in {0} seconds, {1} items read".format( (time.time()-starttime), len(texts) ) - elif args.iphone: - starttime = time.time() - texts = readTextsFromIPhone( args.iphone ) - print "got all texts in {0} seconds, {1} items read".format( (time.time()-starttime), len(texts) ) - - exportAndroidSQL(texts, args.outfile) + #allow use of either the -d option or sms_debug=False + global sms_debug, test_run + sms_debug = args.sms_debug if args.sms_debug else sms_debug + test_run = args.test_run if args.test_run else test_run + + #get the texts into memory + texts = [] + for file in args.infiles: + starttime = time.time() #meause execution time + extension = os.path.splitext(file.name)[1] + if extension == ".csv": + print "Importing texts from Google Voice CSV file:" + new_texts = readTextsFromCSV( file ) + elif extension == ".db": + file.close() + if isIOS6db( file.name ): + print "Importing texts from iOS 6 database" + new_texts = readTextsFromIOS6( file.name ) + else: + print "Importing texts from iOS 4/5 database" + new_texts = readTextsFromIOS5( file.name ) + elif extension == ".xml": + print "Importing texts from backup XML file" + new_texts = readTextsFromXML( file ) + texts += new_texts + print "finished in {0} seconds, {1} messages read".format( (time.time()-starttime), len(new_texts) ) + + print "sorting all {0} texts by date".format( len(texts) ) + sorted(texts, key=lambda text: text.date) + + if os.path.splitext(args.outfile)[1] == '.db': + print "Saving changes into Android DB, "+str(args.outfile) + exportAndroidSQL(texts, args.outfile) + elif os.path.splitext(args.outfile)[1] == '.xml': + print "Saving changes into XML, "+str(args.outfile) + exportXML(texts, args.outfile) + else: + print "unrecognized output file." class Text: - def __init__( self, num, date, type, body, cid): + def __init__( self, num, date, type, body): self.num = num self.date = date self.type = type self.body = body - self.cid = cid def __str__(self): return "%s(%r)" % (self.__class__, self.__dict__) -def readTextsFromIPhone(file): +def cleanNumber(numb): + if not numb: + return False + stripped = ''.join(ch for ch in numb if ch.isalnum()) + if not stripped.isdigit(): + return False + return stripped[-10:] + +## Import functions ## + +def readTextsFromIOS6(file): conn = sqlite3.connect(file) c = conn.cursor() i=0 @@ -45,15 +83,49 @@ def readTextsFromIPhone(file): INNER JOIN handle ON message.handle_id = handle.ROWID \ ORDER BY message.ROWID ASC;') for row in query: - if debug and i > 80: + if sms_debug and i > 80: return i+=1 - txt = Text(row[0],long((row[1] + 978307200)*1000),(row[2]+1),row[3],row[4]) + txt = Text(row[0],long((row[1] + 978307200)*1000),(row[2]+1),row[3]) texts.append(txt) - if debug: + if sms_debug: print txt return texts + +def readTextsFromIOS5(file): + conn = sqlite3.connect(file) + c = conn.cursor() + i=0 + texts = [] + contactLookup = {} + query = c.execute( + 'SELECT is_madrid, madrid_handle, address, date, text, madrid_date_read, flags FROM message;') + for row in query: + if row[0]: + txt = Text( row[1], long((row[3] + 978307200)*1000), (row[5]==0)+1, row[4]) + else: + from_me = row[6] & 0x01 + txt = Text( row[2], long(row[3]*1000), from_me+1, row[4]) + + lookup_num = str(txt.num)[-10:] + if not lookup_num in contactLookup: + contactLookup[lookup_num] = i + txt.cid = contactLookup[lookup_num] + texts.append(txt) + + i+=1 + return texts +def readTextsFromXML(file): + texts = [] + dom = xml.dom.minidom.parse(file) + i = 0 + for sms in dom.getElementsByTagName("sms"): + txt = Text( sms.attributes['address'].value, sms.attributes['date'].value, + sms.attributes['type'].value, sms.attributes['body'].value) + texts.append(txt) + return texts + def readTextsFromCSV(file): inreader = csv.reader( file ) @@ -73,81 +145,120 @@ def readTextsFromCSV(file): texts = [] i=0 for row in inreader: - if debug and i > 80: - break #debug breaks early - txt = Text( row[phNumberIndex], #number long(float(dateutil.parser.parse(row[dateIndex]).strftime('%s.%f'))*1000), #date (2 if row[typeIndex]=='0' else 1), #type - row[bodyIndex], #body - row[cidIndex] ) #contact ID + row[bodyIndex] ) #body texts.append(txt) i += 1 return texts +def isIOS6db(file): + cur = sqlite3.connect(file).cursor() + cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='handle';") + res = cur.fetchone() is not None + cur.close() + return res + +## Export functions ## + def exportAndroidSQL(texts, outfile): #open resources conn = sqlite3.connect(outfile) c = conn.cursor() + #populate fast lookup table: + contactIdFromNumber = {} + query = c.execute('SELECT _id,address FROM canonical_addresses;') + for row in query: + contactIdFromNumber[cleanNumber(row[1])] = row[0] + #start the main loop through each message i=0 lastSpeed=0 lastCheckedSpeed=0 starttime = time.time() - convoMap = {} - - for txt in texts: - if debug and i > 80: - break #debug breaks early + for txt in texts: + if sms_debug and i > 80: + break #sms_debug breaks early + + clean_number = cleanNumber(txt.num) - #add a new conversation thread entry (and canonical_addresses lookup entry) if it doesn't exist - if not txt.cid in convoMap: + #add a new canonical_addresses lookup entry and thread item if it doesn't exist + if not clean_number in contactIdFromNumber: c.execute( "INSERT INTO canonical_addresses (address) VALUES (?)", [txt.num]) - contact_id = c.lastrowid - c.execute( "INSERT INTO threads (recipient_ids) VALUES (?)", [contact_id]) - convoMap[txt.cid] = c.lastrowid - - #now update conversation thread (assuming it was just created or existed before) - thread_id = convoMap[txt.cid] - c.execute( "UPDATE threads SET message_count=message_count + 1,snippet=?,'date'=? WHERE _id=? ", [txt.body,txt.date,thread_id] ) + contactIdFromNumber[clean_number] = c.lastrowid + c.execute( "INSERT INTO threads (recipient_ids) VALUES (?)", [contactIdFromNumber[clean_number]]) + contact_id = contactIdFromNumber[clean_number] + + #now update the conversation thread (happends with each new message) + c.execute( "UPDATE threads SET message_count=message_count + 1,snippet=?,'date'=? WHERE recipient_ids=? ", [txt.body,txt.date,contact_id] ) + c.execute( "SELECT _id FROM threads WHERE recipient_ids=? ", [contact_id] ) + thread_id = c.fetchone()[0] - if debug: - c.execute( "SELECT * FROM threads WHERE _id=?", [thread_id] ) + if sms_debug: + print "thread_id = "+ str(thread_id) + c.execute( "SELECT * FROM threads WHERE _id=?", [contact_id] ) print "updated thread: " + str(c.fetchone()) print "adding entry to message db: " + str([txt.num,txt.date,txt.body,thread_id,txt.type]) #add message to sms table c.execute( "INSERT INTO sms (address,'date',body,thread_id,read,type,seen) VALUES (?,?,?,?,1,?,1)", [txt.num,txt.date,txt.body,thread_id,txt.type]) - #print status - if i%100 == 0: - lastSpeed = int(100/(time.time() - lastCheckedSpeed)) + #print status (with fancy speed calculation) + recalculate_every = 100 + if i%recalculate_every == 0: + lastSpeed = int(recalculate_every/(time.time() - lastCheckedSpeed)) lastCheckedSpeed = time.time() - sys.stdout.write( "\rprocessed {0} entries, {1} convos, ({2} entries/sec)".format(i, len(convoMap), lastSpeed )) + sys.stdout.write( "\rprocessed {0} entries, {1} convos, ({2} entries/sec)".format(i, len(contactIdFromNumber), lastSpeed )) sys.stdout.flush() - i += 1 - - print "\nfinished in {0} seconds (average {1}/second)".format((time.time() - starttime), int(i/(time.time() - starttime))) - if debug: + if sms_debug: print "\n\nthreads: " for row in c.execute('SELECT * FROM threads'): print row - - if do_save and not debug: + if not test_run and not sms_debug: conn.commit() print "changes saved to "+outfile c.close() conn.close() +def exportXML(texts, outfile): + doc = xml.dom.minidom.Document() + doc.encoding = "UTF-8" + smses = doc.createElement("smses") + smses.setAttribute("count", str(len(texts))) + doc.appendChild(smses) + i=0 + for txt in texts: + sms = doc.createElement("sms") + #toa="null" sc_toa="null" service_center="null" read="1" status="-1" locked="0" date_sent="0" readable_date="Sep 27, 2012 10:57:55 AM" contact_name="Kevin Donlon" + sms.setAttribute("address", str(txt.num)) + sms.setAttribute("date", str(txt.date)) + sms.setAttribute("type", str(txt.type)) + sms.setAttribute("body", txt.body) + #useless things: + sms.setAttribute("read", "1") + sms.setAttribute("protocol", "0") + sms.setAttribute("status", "-1") + sms.setAttribute("locked", "0") + smses.appendChild(sms) + if (test_run or sms_debug) and i > 50: + break + i += 1 + if (test_run or sms_debug): + print "xml output: (cut short to 50 items and not written)" + print doc.toprettyxml(indent=" ", encoding="UTF-8") + else: + open(outfile, 'w').write(doc.toprettyxml(indent=" ", encoding="UTF-8")) + if __name__ == '__main__': - main() \ No newline at end of file + sms_main() \ No newline at end of file