forked from Sidnioulz/PolicyAnalysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFileStore.py
323 lines (280 loc) · 12.7 KB
/
FileStore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
"""Service to store File instances."""
from File import File, EventFileFlags
from utils import time2Str, debugEnabled
import os
import shutil
import sys
class FileStore(object):
"""A service to store File instances."""
__file_store = None
@staticmethod
def get():
"""Return the FileStore for the entire application."""
if FileStore.__file_store is None:
FileStore.__file_store = FileStore()
return FileStore.__file_store
@staticmethod
def reset():
FileStore.__file_store = None
def __init__(self):
"""Construct a FileStore."""
super(FileStore, self).__init__()
self.clear()
def __len__(self):
"""Return the number of Files in the FileStore."""
return len(self.inodeStore)
def __iter__(self):
"""Iterate over all Files."""
for name in sorted(self.nameStore):
for f in self.nameStore[name]:
yield f
def clear(self):
"""Empty the FileStore."""
self.nameStore = dict() # type: dict
self.inodeStore = dict() # type: dict
def getChildren(self, f: File, time: int):
"""Get a File's direct children."""
return self.getChildrenFromPath(f.path + '/', time)
def getChildrenFromPath(self, path: str, time: int):
"""Get the Files whose direct parent is :path:."""
children = []
for item in [k for k in self.nameStore.items()
if k[0].startswith(path)]:
# Only direct children
if item[0][len(path)+1:].find('/') == -1:
for file in item[1]:
tstart = file.getTimeOfStart()
tend = file.getTimeOfEnd()
if time != -1 and time < tstart:
break
elif not tend or tend >= time:
children.append(file)
break
return children
def printFiles(self,
showDeleted: bool=False,
showCreationTime: bool=False,
showDocumentsOnly: bool=False,
userHome: str=None,
showDesignatedOnly: bool=False):
"""Print all the files currently being stored."""
for key in sorted(self.nameStore, key=lambda s: s.lower()):
files = self.nameStore[key]
last = files[-1] # TODO handle multiple versions
printpath = last.getName()
# Print only files accessed by designation, if asked to
if showDesignatedOnly:
flags = EventFileFlags.designation
if not last.getAccesses(flags):
continue
# Print only user documents, if we have a home to compare to
if showDocumentsOnly and userHome:
if not last.isUserDocument(userHome, allowHiddenFiles=True):
continue
# Ensure we print folders with a /, and files with leading space
lastDir = printpath.rfind('/')
if lastDir > 0:
printpath = (lastDir+1)*' ' + printpath[lastDir+1:]
if last.isFolder():
printpath += "/"
elif lastDir == 0 and last.isFolder():
printpath += "/"
# Non-deleted files
if not last.getTimeOfEnd():
if showCreationTime and last.getTimeOfStart():
print("%s\tCREATED on %s" % (
printpath,
time2Str(last.getTimeOfStart())))
else:
print("%s" % printpath)
# Deleted files, if the callee wants them too
elif showDeleted:
if showCreationTime and last.getTimeOfStart():
print("%s\tCREATED on %s, DELETED on %s" % (
printpath,
time2Str(last.getTimeOfStart()),
time2Str(last.getTimeOfEnd())))
else:
print("%s\tDELETED on %s" % (
printpath,
time2Str(last.getTimeOfEnd())))
def makeFiles(self,
outputDir: str,
showDeleted: bool=False,
showDocumentsOnly: bool=False,
userHome: str=None,
showDesignatedOnly: bool=False):
"""Make all the files currently being stored into a folder."""
if not outputDir:
raise ValueError("You must provide an output location to make "
"Files.")
else:
if os.path.exists(outputDir):
backup = outputDir.rstrip("/") + ".backup"
if os.path.exists(backup):
shutil.rmtree(backup)
os.replace(outputDir, backup)
os.makedirs(outputDir, exist_ok=False)
return # NOTE: Disabled for performance reasons.
for key in sorted(self.nameStore, key=lambda s: s.lower()):
files = self.nameStore[key]
lastCnt = 0
for last in reversed(files):
if outputDir.endswith("/") or last.getName().startswith("/"):
outpath = outputDir + last.getName()
else:
outpath = outputDir + "/" + last.getName()
# Deal with previous versions
if lastCnt:
outpath += ".prev.%d" % lastCnt
lastCnt += 1
# Print only files accessed by designation, if asked to
if showDesignatedOnly:
flags = EventFileFlags.designation
if not last.getAccesses(flags):
continue
# Print only user documents, if we have a home to compare to
if showDocumentsOnly and userHome:
if not last.isUserDocument(userHome, allowHiddenFiles=True):
continue
# Ensure all parent folders exist
parentFileName = last.getParentName()
if parentFileName:
parentPath = outputDir + '/' + parentFileName
try:
os.makedirs(parentPath, exist_ok=True)
except(FileExistsError) as e:
print("Warning: file '%s' aready exists, but is a "
"parent folder for file '%s'. Attempting to "
"delete the file and create a folder "
"instead..." % (parentFileName,
last.getName()),
file=sys.stderr)
parentFiles = self.getFilesForName(parentFileName)
for parentFile in parentFiles:
if parentFile.getType() and not \
parentFile.isFolder():
print("Warning: file '%s' already exists and"
" is not a directory. Mime type: %s" %
(parentFile.path, parentFile.getType()))
parentFile.setType('inode/directory')
self.updateFile(parentFile)
os.remove(parentPath)
os.makedirs(parentPath, exist_ok=False)
print("Info: updated %d files with name '%s'." % (
len(parentFiles), parentFile.getName()),
file=sys.stderr)
if not last.getTimeOfEnd() or showDeleted:
if last.isFolder():
# Make the folder. If there's a file with the same
# name, that file was a folder and must be corrected.
os.makedirs(outpath, exist_ok=True)
with open(outpath+"/.ucl-metadata", 'a') as f:
os.utime(outpath+"/.ucl-metadata", None)
last.writeStatistics(f)
else:
try:
with open(outpath, 'a') as f:
os.utime(outpath, None)
last.writeStatistics(f)
# Seems to happen sometimes when a file was updated
# above.
except(IsADirectoryError) as e:
os.makedirs(outpath, exist_ok=True)
with open(outpath+"/.ucl-metadata", 'a') as f:
os.utime(outpath+"/.ucl-metadata", None)
last.writeStatistics(f)
def getUserDocumentCount(self, userHome: str, allowHiddenFiles: bool=False):
"""Return the number of user documents in the FileStore."""
count = 0
for f in self:
if f.isUserDocument(userHome, allowHiddenFiles=allowHiddenFiles):
count += 1
return count
def getFilesForName(self, name: str):
"""Return all Files that have the given name as a path."""
try:
return self.nameStore[name]
except(KeyError) as e:
return []
def getFile(self, inode: int):
"""Return the File identified by an inode."""
try:
return self.inodeStore[inode]
except(KeyError) as e:
return None
def updateFile(self, file: File, oldName: str=None):
"""Add a File to the FileStore."""
filesWithName = self.getFilesForName(oldName or file.getName())
for (index, old) in enumerate(filesWithName):
if old.inode == file.inode:
if not oldName:
filesWithName[index] = file
break
else:
del filesWithName[index]
self.addFile(file)
break
else:
raise ArithmeticError("Attempted to update file '%s' (made on %s)"
", but it has not yet been added to the "
"store." % (file, file.getTimeOfStart()))
def addFile(self, file: File):
"""Add a File to the FileStore."""
name = file.getName()
tstart = file.getTimeOfStart()
tend = file.getTimeOfEnd()
filesWithName = self.getFilesForName(name)
# Empty case
if len(filesWithName) == 0:
self.nameStore[name] = [file]
self.inodeStore[file.inode] = file
return
# We must be the last for this name as the data must be sorted
lastFile = filesWithName[-1]
if lastFile.getTimeOfEnd() > tstart:
raise ArithmeticError("Newly inserted file '%s' (made on %s) not "
"last. Followed by a file made on %s." % (
name, tstart, lastFile.getTimeOfEnd()))
return
else:
filesWithName.append(file)
self.nameStore[name] = filesWithName
self.inodeStore[file.inode] = file
return
assert False, "FileStore.addFile(): temporal inconsistency on '%s' " \
"(made on %s). This is due to some Events not being " \
"captured or some event types not being processed. " % (
name, tstart)
# DEBUGGING: Get past all apps that ended before this one
for named in filesWithName:
if named.getTimeOfEnd() == 0:
raise ArithmeticError("Found undeleted file for this name")
return
elif named.getTimeOfEnd() > tstart:
raise ArithmeticError("Time overlap between two files on name")
return
if tend and tend < named.getTimeOfStart():
raise ArithmeticError("TODO: not implemented, mid-insert")
break
else:
raise ArithmeticError("I lost myself on the way...")
pass
def purgeFDReferences(self):
"""Remove FD references that weren't solved yet from the FileStore."""
count = 0
dels = set()
delInodes = set()
for name in self.nameStore:
if name.startswith("@fdref"):
dels.add(name)
for f in self.nameStore.get(name):
delInodes.add(f.inode)
for name in dels:
del self.nameStore[name]
for inode in delInodes:
count += 1
del self.inodeStore[inode]
if debugEnabled():
print("Info: purged %d unresolved file descriptor references." %
count)