forked from Yanivmd/TRACY
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathGraphletsCompareMyNG.py
250 lines (178 loc) · 7.4 KB
/
GraphletsCompareMyNG.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#-------------------------------------------------------------------------------
# Name: module1
# Purpose:
#
# Author: user
#
# Created: 22/08/2013
# Copyright: (c) user 2013
# Licence: <your licence>
#-------------------------------------------------------------------------------
#!/usr/bin/env python
from difflib import *
import myutils
import numpy
import re
#this is needed only for tests...
#import GraphletRewritter
from x86Analyzer import isRegisterStr,seperateCmd
printTrace = True
"""
we name the intel x86 cmd layout as written by IDA:
<OPCODE + describers> <param1>?<param2>?
example for describers:
mov ptr byte VS just mov
we grade cmd match with this logic:
if opcode NOT is exactly the same (OPCODE + describers) return 0
else
+2
then we set the params - for every member in the union group of tar and ref +1
"""
def __calcMaxCmdsGrade(refCmds):
return sum(map(lambda x:__gradeCmdMatch(x,x),refCmds))
def argumentToStr(argument):
if "[" in argument:
return "MEM"
if argument.startswith("offset"):
return "DataOffset"
if isRegisterStr(argument):
return "REG"
# a field is somthing seperated by " " , an argument is a list of params seperated by "," ; a member is a param seperated by "+"
def breakCommand(cmd):
#cmd = "mov [esp+28h+var_28_firstPush],offset aDHelo"
#cmd = "mov [esp+28h+var_28_firstPush],(offset aDHelo)"
cmdFields = seperateCmd(cmd)
operationWithArguments = cmdFields[0:-1]
if not "," in cmdFields[-1]:
return {'operation':operationWithArguments,'ParamsList':[cmdFields[-1]]}
params = cmdFields[-1].split(",")
operationWithArguments.extend(map(argumentToStr,params))
# the map will strip [ and ]
return {'operation':operationWithArguments,'ParamsList':map(lambda par:par[1:-1] if '[' in par else par,params)}
def __gradeCmdMatch(refCmd,tarCmd):
if refCmd == "" or tarCmd == "":
return 0
totalGrade = 2
refCmdInfo = breakCommand(refCmd)
tarCmdInfo = breakCommand(tarCmd)
if refCmdInfo['operation'] != tarCmdInfo['operation']:
return -1
for (refParam,tarParam) in zip(refCmdInfo['ParamsList'],tarCmdInfo['ParamsList']):
refSet = set(refParam.split("+"))
tarSet = set(tarParam.split("+"))
if len(tarSet) != len(refSet):
return -1
totalGrade += len(refSet.intersection(tarSet))
return totalGrade
# this will return (matchedCmds,deletedCmds,insertedCmds,grade)
# deleted is cmds we need to delete from tar (left overs not matched from tar)
# insert are cmds we need to insert into ref (left overs not matched in ref)
def __doGetMatchedCmds(refCmds,tarCmds,fillArrays=True):
res = myutils.iff(lambda x: x == [],refCmds, tarCmds)
if res == 1:
return ([],[],[],0)
elif res < 0:
return ([],tarCmds,[],0) if res == -1 else ([],[],refCmds,0)
# we have tarCmds rows, and refCmds columns
# where ar[0][1] is row 0 col 1
array = []
for i in range(0,len(tarCmds)):
array.append([0] * len(refCmds))
#fill last row
for i in range(0,len(refCmds)):
grade = __gradeCmdMatch(refCmds[i],tarCmds[-1])
array[-1][i] = {'my':grade,'best':grade}
# fill last column
for i in range(0,len(tarCmds)):
grade = __gradeCmdMatch(refCmds[-1],tarCmds[i])
array[i][-1] = {'my':grade,'best':grade}
# work on the rest of them
for i in range(len(tarCmds)-2,-1,-1):
for j in range(len(refCmds)-2,-1,-1):
grade = __gradeCmdMatch(refCmds[j],tarCmds[i]) + array[i+1][j+1]['best']
myMax = max(grade,array[i][j+1]['best'],array[i+1][j]['best'])
array[i][j] = {'my':grade,'best':myMax}
"""
print "ARRAY:"
for row in array:
for cell in row:
print cell,
print ";"
print "DONE"
"""
if fillArrays == False:
return array[0][0]['best']
#now we must walk on the array and find the used sequence
curI = 0
curJ = 0
matchedCmds = []
insertedCmds = []
deletedCmds = []
Iok = curI < len(tarCmds)
Jok = curJ < len(refCmds)
while Iok and Jok:
if Iok and Jok and (array[curI][curJ]['best'] == array[curI][curJ]['my']):
# the current one was chosen , +1 cuz here we are 0 based but i want to be 1 based
if array[curI][curJ]['best'] == 0:
if tarCmds[curI] == "":
deletedCmds.append(tarCmds[curI])
elif refCmds[curJ] == "":
insertedCmds.append(refCmds[curJ])
else:
matchedCmds.append({'ref':refCmds[curJ],'tar':tarCmds[curI],'tarCmdNum':curI+1,'refCmdNum':curJ+1,'operationMatch':array[curI][curJ]['best'] > 0})
curI+=1
Iok = curI < len(tarCmds)
curJ+=1
Jok = curJ < len(refCmds)
elif Jok and array[curI][curJ]['best'] == array[curI][curJ+1]['best']:
insertedCmds.append(refCmds[curJ])
curJ+=1
Jok = curJ < len(refCmds)-1
elif Iok:
deletedCmds.append(tarCmds[curI])
curI+=1
Iok = curI < len(tarCmds)-1
while Iok:
deletedCmds.append(tarCmds[curI])
curI+=1
Iok = curI < len(tarCmds)-1
while Jok:
insertedCmds.append(refCmds[curJ])
curJ+=1
Jok = curJ < len(refCmds)-1
return (matchedCmds,deletedCmds,insertedCmds,array[0][0]['best'])
def __getGradesDict(grade,refCmds,tarCmds):
maxRef = __calcMaxCmdsGrade(refCmds)
maxTar = __calcMaxCmdsGrade(tarCmds)
containment = (float(grade) / min(float(maxRef),float(maxTar))) * 100
ratio = (float(grade)*2.0 / float((maxRef+maxTar))) * 100
return {'ratio':ratio,'contain':containment}
# the next functions uses my method to grade graphlet match and get matched cmds
# return (matchedCmds,deletedCmds,insertedCmds)
def getMatchedCmdsWithGrade(refCode,tarCode):
res = myutils.iff(lambda x: x=="",refCode,tarCode)
if res==1:
return {'matchedCmds':[],'deletedCmds':[],'insertedCmds':[],'gradesDict':{'ratio':100,'contain':100},'refCode':refCode,'tarCode':tarCode}
if res==0:
refCmds = refCode.split(";")
tarCmds = tarCode.split(";")
(matchedCmds,deletedCmds,insertedCmds,grade) = __doGetMatchedCmds(refCmds,tarCmds)
return {'matchedCmds':matchedCmds,'deletedCmds':deletedCmds,'insertedCmds':insertedCmds,'gradesDict':__getGradesDict(grade, refCmds, tarCmds),'refCode':refCode,'tarCode':tarCode}
else:
return {'matchedCmds':[],'deletedCmds':[],'insertedCmds':[],'gradesDict':{'ratio':0,'contain':0},'refCode':refCode,'tarCode':tarCode}
def compareDisAsmCode(refCode,tarCode):
res = myutils.iff(lambda x: x=="",refCode,tarCode)
if res==1:
return {'ratio':100,'contain':100}
if res==0:
refCmds = refCode.split(";")
tarCmds = tarCode.split(";")
grade = __doGetMatchedCmds(refCmds,tarCmds,False)
return __getGradesDict(grade, refCmds, tarCmds)
else:
return {'ratio':0,'contain':0}
# this will return compare methods in order of precition, from less to most (presice)
def getCompareMethods():
yield compareDisAsmCode
if __name__ == '__main__':
print getMatchedCmdsWithGrade("mov [ebp+var_6C],0", "mov ecx,[edi+1Ch]")