From 38fa924a10dc924b97802b88e7b606afae97261b Mon Sep 17 00:00:00 2001 From: Tim O'Brien Date: Wed, 10 Oct 2012 13:43:14 -0700 Subject: [PATCH] Overhaul: iOS 4+5 support, XML in/out support. New interface. This is a huge change! Provide a list of files to import, it autodetectes each's format, merges the results, sorts them, and exports them to either XML or mmssms.db (sqlite). Use it just like cp: python sms_db_importer.py sms-iOS-5.db gv.csv android.xml out.xml Now imports: - iOS 5 databases, iMessages and all - iOS 6 databases (better architecture, actually) - Android XML from the com.riteshsahu.SMSBackupRestore app. - CSV of google voice data Now saves: - XML format for use with SMSBackupRestore - mmssms.db Android sms database format Requires adb root access. Why use this option? WAY faster and much better database performance. SMSBackupRestore is great, but it doesn't handle tens of thousands of messages. Fixes #1 where existing threads were not properly handled. --- sms_db_importer.py | 229 +++++++++++++++++++++++++++++++++------------ 1 file changed, 170 insertions(+), 59 deletions(-) diff --git a/sms_db_importer.py b/sms_db_importer.py index 6293336..c6a601c 100644 --- a/sms_db_importer.py +++ b/sms_db_importer.py @@ -1,40 +1,78 @@ -import argparse, sys, time, dateutil.parser, sqlite3, csv -debug = False -do_save = True #save the results? +import argparse, os, sys, time, dateutil.parser, sqlite3, csv, xml.dom.minidom +sms_debug = False +test_run = False #test = don't save results -def main(): +def sms_main(): parser = argparse.ArgumentParser(description='Import texts to android sms database file.') - inputgroup = parser.add_mutually_exclusive_group() - inputgroup.add_argument( "-csv", type=argparse.FileType('r'), help='input CSV file' ) - inputgroup.add_argument( "-iphone", type=str, help='input iPhone sms.db file' ) + parser.add_argument('infiles', nargs='+', type=argparse.FileType('r'), help='input files, may include multiple sources') parser.add_argument('outfile', type=str, help='output mmssms.db file use. Must alread exist.') - parser.add_argument('-d', action='store_true', dest='debug', help='extra info') - args = parser.parse_args()#"-iphone ../sms.db mmssms.db".split()) - global debug - debug = args.debug if args.debug else debug + parser.add_argument('-d', action='store_true', dest='sms_debug', help='sms_debug run: extra info, limits to 80, no save.') + parser.add_argument('-t', action='store_true', dest='test_run', help='Test run, no saving anything') + try: + args = parser.parse_args()#"-iphone ../sms.db mmssms.db".split()) + except IOError: + print "Problem opening file." + quit() - if args.csv: - starttime = time.time() - texts = readTextsFromCSV( args.csv ) - print "got all texts in {0} seconds, {1} items read".format( (time.time()-starttime), len(texts) ) - elif args.iphone: - starttime = time.time() - texts = readTextsFromIPhone( args.iphone ) - print "got all texts in {0} seconds, {1} items read".format( (time.time()-starttime), len(texts) ) - - exportAndroidSQL(texts, args.outfile) + #allow use of either the -d option or sms_debug=False + global sms_debug, test_run + sms_debug = args.sms_debug if args.sms_debug else sms_debug + test_run = args.test_run if args.test_run else test_run + + #get the texts into memory + texts = [] + for file in args.infiles: + starttime = time.time() #meause execution time + extension = os.path.splitext(file.name)[1] + if extension == ".csv": + print "Importing texts from Google Voice CSV file:" + new_texts = readTextsFromCSV( file ) + elif extension == ".db": + file.close() + if isIOS6db( file.name ): + print "Importing texts from iOS 6 database" + new_texts = readTextsFromIOS6( file.name ) + else: + print "Importing texts from iOS 4/5 database" + new_texts = readTextsFromIOS5( file.name ) + elif extension == ".xml": + print "Importing texts from backup XML file" + new_texts = readTextsFromXML( file ) + texts += new_texts + print "finished in {0} seconds, {1} messages read".format( (time.time()-starttime), len(new_texts) ) + + print "sorting all {0} texts by date".format( len(texts) ) + sorted(texts, key=lambda text: text.date) + + if os.path.splitext(args.outfile)[1] == '.db': + print "Saving changes into Android DB, "+str(args.outfile) + exportAndroidSQL(texts, args.outfile) + elif os.path.splitext(args.outfile)[1] == '.xml': + print "Saving changes into XML, "+str(args.outfile) + exportXML(texts, args.outfile) + else: + print "unrecognized output file." class Text: - def __init__( self, num, date, type, body, cid): + def __init__( self, num, date, type, body): self.num = num self.date = date self.type = type self.body = body - self.cid = cid def __str__(self): return "%s(%r)" % (self.__class__, self.__dict__) -def readTextsFromIPhone(file): +def cleanNumber(numb): + if not numb: + return False + stripped = ''.join(ch for ch in numb if ch.isalnum()) + if not stripped.isdigit(): + return False + return stripped[-10:] + +## Import functions ## + +def readTextsFromIOS6(file): conn = sqlite3.connect(file) c = conn.cursor() i=0 @@ -45,15 +83,49 @@ def readTextsFromIPhone(file): INNER JOIN handle ON message.handle_id = handle.ROWID \ ORDER BY message.ROWID ASC;') for row in query: - if debug and i > 80: + if sms_debug and i > 80: return i+=1 - txt = Text(row[0],long((row[1] + 978307200)*1000),(row[2]+1),row[3],row[4]) + txt = Text(row[0],long((row[1] + 978307200)*1000),(row[2]+1),row[3]) texts.append(txt) - if debug: + if sms_debug: print txt return texts + +def readTextsFromIOS5(file): + conn = sqlite3.connect(file) + c = conn.cursor() + i=0 + texts = [] + contactLookup = {} + query = c.execute( + 'SELECT is_madrid, madrid_handle, address, date, text, madrid_date_read, flags FROM message;') + for row in query: + if row[0]: + txt = Text( row[1], long((row[3] + 978307200)*1000), (row[5]==0)+1, row[4]) + else: + from_me = row[6] & 0x01 + txt = Text( row[2], long(row[3]*1000), from_me+1, row[4]) + + lookup_num = str(txt.num)[-10:] + if not lookup_num in contactLookup: + contactLookup[lookup_num] = i + txt.cid = contactLookup[lookup_num] + texts.append(txt) + + i+=1 + return texts +def readTextsFromXML(file): + texts = [] + dom = xml.dom.minidom.parse(file) + i = 0 + for sms in dom.getElementsByTagName("sms"): + txt = Text( sms.attributes['address'].value, sms.attributes['date'].value, + sms.attributes['type'].value, sms.attributes['body'].value) + texts.append(txt) + return texts + def readTextsFromCSV(file): inreader = csv.reader( file ) @@ -73,81 +145,120 @@ def readTextsFromCSV(file): texts = [] i=0 for row in inreader: - if debug and i > 80: - break #debug breaks early - txt = Text( row[phNumberIndex], #number long(float(dateutil.parser.parse(row[dateIndex]).strftime('%s.%f'))*1000), #date (2 if row[typeIndex]=='0' else 1), #type - row[bodyIndex], #body - row[cidIndex] ) #contact ID + row[bodyIndex] ) #body texts.append(txt) i += 1 return texts +def isIOS6db(file): + cur = sqlite3.connect(file).cursor() + cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='handle';") + res = cur.fetchone() is not None + cur.close() + return res + +## Export functions ## + def exportAndroidSQL(texts, outfile): #open resources conn = sqlite3.connect(outfile) c = conn.cursor() + #populate fast lookup table: + contactIdFromNumber = {} + query = c.execute('SELECT _id,address FROM canonical_addresses;') + for row in query: + contactIdFromNumber[cleanNumber(row[1])] = row[0] + #start the main loop through each message i=0 lastSpeed=0 lastCheckedSpeed=0 starttime = time.time() - convoMap = {} - - for txt in texts: - if debug and i > 80: - break #debug breaks early + for txt in texts: + if sms_debug and i > 80: + break #sms_debug breaks early + + clean_number = cleanNumber(txt.num) - #add a new conversation thread entry (and canonical_addresses lookup entry) if it doesn't exist - if not txt.cid in convoMap: + #add a new canonical_addresses lookup entry and thread item if it doesn't exist + if not clean_number in contactIdFromNumber: c.execute( "INSERT INTO canonical_addresses (address) VALUES (?)", [txt.num]) - contact_id = c.lastrowid - c.execute( "INSERT INTO threads (recipient_ids) VALUES (?)", [contact_id]) - convoMap[txt.cid] = c.lastrowid - - #now update conversation thread (assuming it was just created or existed before) - thread_id = convoMap[txt.cid] - c.execute( "UPDATE threads SET message_count=message_count + 1,snippet=?,'date'=? WHERE _id=? ", [txt.body,txt.date,thread_id] ) + contactIdFromNumber[clean_number] = c.lastrowid + c.execute( "INSERT INTO threads (recipient_ids) VALUES (?)", [contactIdFromNumber[clean_number]]) + contact_id = contactIdFromNumber[clean_number] + + #now update the conversation thread (happends with each new message) + c.execute( "UPDATE threads SET message_count=message_count + 1,snippet=?,'date'=? WHERE recipient_ids=? ", [txt.body,txt.date,contact_id] ) + c.execute( "SELECT _id FROM threads WHERE recipient_ids=? ", [contact_id] ) + thread_id = c.fetchone()[0] - if debug: - c.execute( "SELECT * FROM threads WHERE _id=?", [thread_id] ) + if sms_debug: + print "thread_id = "+ str(thread_id) + c.execute( "SELECT * FROM threads WHERE _id=?", [contact_id] ) print "updated thread: " + str(c.fetchone()) print "adding entry to message db: " + str([txt.num,txt.date,txt.body,thread_id,txt.type]) #add message to sms table c.execute( "INSERT INTO sms (address,'date',body,thread_id,read,type,seen) VALUES (?,?,?,?,1,?,1)", [txt.num,txt.date,txt.body,thread_id,txt.type]) - #print status - if i%100 == 0: - lastSpeed = int(100/(time.time() - lastCheckedSpeed)) + #print status (with fancy speed calculation) + recalculate_every = 100 + if i%recalculate_every == 0: + lastSpeed = int(recalculate_every/(time.time() - lastCheckedSpeed)) lastCheckedSpeed = time.time() - sys.stdout.write( "\rprocessed {0} entries, {1} convos, ({2} entries/sec)".format(i, len(convoMap), lastSpeed )) + sys.stdout.write( "\rprocessed {0} entries, {1} convos, ({2} entries/sec)".format(i, len(contactIdFromNumber), lastSpeed )) sys.stdout.flush() - i += 1 - - print "\nfinished in {0} seconds (average {1}/second)".format((time.time() - starttime), int(i/(time.time() - starttime))) - if debug: + if sms_debug: print "\n\nthreads: " for row in c.execute('SELECT * FROM threads'): print row - - if do_save and not debug: + if not test_run and not sms_debug: conn.commit() print "changes saved to "+outfile c.close() conn.close() +def exportXML(texts, outfile): + doc = xml.dom.minidom.Document() + doc.encoding = "UTF-8" + smses = doc.createElement("smses") + smses.setAttribute("count", str(len(texts))) + doc.appendChild(smses) + i=0 + for txt in texts: + sms = doc.createElement("sms") + #toa="null" sc_toa="null" service_center="null" read="1" status="-1" locked="0" date_sent="0" readable_date="Sep 27, 2012 10:57:55 AM" contact_name="Kevin Donlon" + sms.setAttribute("address", str(txt.num)) + sms.setAttribute("date", str(txt.date)) + sms.setAttribute("type", str(txt.type)) + sms.setAttribute("body", txt.body) + #useless things: + sms.setAttribute("read", "1") + sms.setAttribute("protocol", "0") + sms.setAttribute("status", "-1") + sms.setAttribute("locked", "0") + smses.appendChild(sms) + if (test_run or sms_debug) and i > 50: + break + i += 1 + if (test_run or sms_debug): + print "xml output: (cut short to 50 items and not written)" + print doc.toprettyxml(indent=" ", encoding="UTF-8") + else: + open(outfile, 'w').write(doc.toprettyxml(indent=" ", encoding="UTF-8")) + if __name__ == '__main__': - main() \ No newline at end of file + sms_main() \ No newline at end of file