Skip to content

Commit

Permalink
when checking if the tw was labeled before, use tw+ip combination, no…
Browse files Browse the repository at this point in the history
…t just tw number
  • Loading branch information
AlyaGomaa committed May 16, 2024
1 parent 3b737ae commit 5ca1b67
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 21 deletions.
30 changes: 27 additions & 3 deletions database/sqlite_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,14 @@ def set_gt_label_for_tw(

params = (ip, tw, label)
self.execute(query, params=params)

def is_valid_tool(self, tool: str) -> bool:
if tool not in ['suricata', 'ground_truth', 'slips']:
print(f"{tool} not supported!!")
return False
return True


def set_tool_label_for_tw(
self, ip: str, tool: str, tw: int, label: str):
"""
Expand All @@ -500,9 +507,8 @@ def set_tool_label_for_tw(
by the ground truth
:param label: malicious or benign
"""
if tool not in ['suricata', 'ground_truth', 'slips']:
print("TRYING TO STORE THE LABEL FOR AN INVALID TOOL!!")
return False
if not self.is_valid_tool(tool):
return

if not self.is_registered_timewindow(tw):
# tw wasn't seen by the gt.
Expand Down Expand Up @@ -594,7 +600,25 @@ def get_labels_flow_by_flow(self, by='all') -> Iterator[str]:
break
yield row

def was_tw_labeled_before(self, tw: int, ip: str, tool: str) -> bool:
"""
checks if the given tw and ip combination was labeled before by the
given tool. checks the labels_per_tw table
:param tw: tw number to check
:param ip: str ip of the host to check
:param tool: slips, suricata or the ground truth
:return: bool
"""
if not self.is_valid_tool(tool):
return False

return True if self.select(
'labels_per_tw',
f'{tool}_label',
condition=f'IP==\'{ip}\' AND timewindow=={tw}',
fetch='one'
) else False

def get_flows_count(self, type_:str, label="") -> int:
"""
returns all the malicious/benign labeled flows by slips, suricata,
Expand Down
42 changes: 24 additions & 18 deletions parsers/ground_truth.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,8 @@ def extract_fields(self, line: str) -> dict:

def register_timewindow(self, ts) -> dict:
"""
registers a new timewindow if the ts doesn't belong t an existing one
sets the current self.tw_number
registers a new timewindow if the ts doesn't belong to
an existing one.
:param ts: unix ts of the flow being parsed
returns the number of the registered tw and a bool indicating
whether the tw was registered before or not
Expand All @@ -250,6 +250,8 @@ def register_timewindow(self, ts) -> dict:

if self.is_first_flow:
self.is_first_flow = False
# first timestamp ever seen in the gt conn.log will be
# the start of tw1
self.twid_handler = TimewindowHandler(ts)
tw_number = 1
else:
Expand Down Expand Up @@ -285,34 +287,35 @@ def get_full_path(self, filename: str) -> str:
# this tool is given a zeek logfile and the path of it is abs
return filename


def was_tw_registered(self, tw: int) -> bool:
return self.db.is_registered_timewindow(tw)

def should_label_tw(self, tw_registration_stats: dict, label: str) -> (
def should_label_tw(self, tw: int, flow: dict) -> (
bool):
"""
determines whteher to label the tw or not if:
1. tw wasnt labeled before
2. tw was labeled before as benign and now the label is malicious
determines whether to label the tw or not if:
1. tw wasnt labeled before for the same IP
2. twand ip was labeled before as benign and now the label is
malicious
if the tw was labeled before as malicious and now it's benign,
we don't update the label.
:param tw_registration_stats: fict with the following keys
tw: tw number
was_registered_before: bool indicating with whether the tw was
registered before in the db or not
:param label: the label we wanna set to the tw
:param tw: tw number
:param flow: dict with the srcip and label
:return: whether or not the current label of this tw should be
added to the db
"""
registered_b4 = tw_registration_stats["was_registered_before"]
if not registered_b4:
# first label for this tw
return True
labeled_b4 = self.db.was_tw_labeled_before(
tw, flow['srcip'], self.tool_name
)

if label == 'malicious':
if not labeled_b4:
# first label for this tw and this IP
return True

if flow['label'] == 'malicious':
return True
return False

Expand All @@ -326,7 +329,10 @@ def label_tw(self, flow: dict, tw_registration_stats: dict):
was_registered_before: bool indicating with whether the tw was
registered before in the db or not
"""
if not self.should_label_tw(tw_registration_stats, flow['label']):
if not self.should_label_tw(
tw_registration_stats["tw_number"],
flow
):
return False

self.db.set_gt_label_for_tw(
Expand Down

0 comments on commit 5ca1b67

Please sign in to comment.