Skip to content

Commit

Permalink
Added new working iter spout, tested on dev
Browse files Browse the repository at this point in the history
  • Loading branch information
fedelemantuano committed Mar 9, 2019
1 parent d3b0e24 commit 4c2f236
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 2 deletions.
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,12 @@ dist: clean ## builds source and wheel package

install: clean ## install the package to the active Python's site-packages
python setup.py install

debug-iter-topology:
mkdir /tmp/logs/ 2>/dev/null || echo /tmp/logs/ already exist
sparse run \
-n spamscope_debug_iter \
-e debug \
-o topology.max.spout.pending=1 \
-o "topology.sleep.spout.wait.strategy.time.ms=10" \
-o "topology.tick.tuple.freq.secs=10"
44 changes: 44 additions & 0 deletions conf/spamscope.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,50 @@ files-mails:
path_mails: /path/mails2


# Spout file on file system
# Use an iterator. Safe for RAM
iter-files-mails:

# The mails in processing older that fail.after.seconds will be failed
fail.after.seconds: 60

# Post processing
post_processing:

# move or remove mails analyzed, default remove
what: remove

# Where you want move the analyzed mails, default /tmp/moved
where: /tmp/moved

# Where you want move the failed mails, default /tmp/failed
where.failed: /tmp/failed

# Mailboxes
mailboxes:
test:
mail_server: hostname
# Trust string is used to get sender IP address from mail server.
# More details:
# https://github.com/SpamScope/mail-parser/blob/v0.4.6/mailparser/__init__.py#L221
trust_string: "test_trust_string"
files_pattern: "*untroubled*"
path_mails: /path/mails1

# This flag enables Outlook msg parsing for every mails in mailbox
# Default value is false
outlook: false

# List of others headers to get
headers:
- custom_one
- custom_two
test1:
mail_server: hostname
trust_string: "test1_trust_string"
files_pattern: "*"
path_mails: /path/mails2

# Bolts configurations
# Phishing bolt configuration
phishing:
Expand Down
13 changes: 13 additions & 0 deletions src/spouts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Overview
In this folder there are all SpamScope `spouts`.

# How add a new spout
These are the steps to add a new `spout` to Spamscope:

- add a new module in [spouts](./) folder. This module should implement a new class that has `AbstractSpout` as base.

- import the new class in [__init__.py](./__init__.py)

- add the new section in [main configuration file](../../conf/spamscope.example.yml). The name of this section will be used in topology file

- add the new spout in [topology](../../topologies)
1 change: 1 addition & 0 deletions src/spouts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
"""

from .files_mails import FilesMailSpout
from .iter_files_mails import IterFilesMailSpout
8 changes: 6 additions & 2 deletions src/spouts/iter_files_mails.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,16 @@ def iter_mails(self):
if v.get("outlook", False):
mail_type = MAIL_PATH_OUTLOOK

for mail in glob.iglob(os.path.join(path, pattern)):
mails = sorted(
glob.iglob(os.path.join(path, pattern)),
key=os.path.getmtime)

for mail in mails:
yield MailItem(
filename=mail,
mail_server=v["mail_server"],
mailbox=k,
priority=v["priority"],
priority=None,
trust=v["trust_string"],
mail_type=mail_type,
headers=v.get("headers", []))
Expand Down
77 changes: 77 additions & 0 deletions topologies/spamscope_debug_iter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Copyright 2017 Fedele Mantuano (https://twitter.com/fedelemantuano)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""


from spouts import IterFilesMailSpout
from bolts import (Attachments, JsonMaker, Phishing, Tokenizer,
Urls, Network, RawMail, OutputDebug)
from streamparse import Grouping, Topology


class OutputDebugTopology(Topology):

files_spout = IterFilesMailSpout.spec(
name="iter-files-mails")

tokenizer = Tokenizer.spec(
name="tokenizer",
inputs=[files_spout],
par=1)

attachments = Attachments.spec(
name="attachments",
inputs={tokenizer['attachments']: Grouping.fields('sha256_random')},
par=1)

urls = Urls.spec(
name="urls",
inputs={
attachments: Grouping.fields('sha256_random'),
tokenizer['body']: Grouping.fields('sha256_random')})

phishing = Phishing.spec(
name="phishing",
inputs={
attachments: Grouping.fields('sha256_random'),
tokenizer['mail']: Grouping.fields('sha256_random'),
urls: Grouping.fields('sha256_random')})

network = Network.spec(
name="network",
inputs={tokenizer['network']: Grouping.fields('sha256_random')},
par=1)

raw_mail = RawMail.spec(
name="raw_mail",
inputs={tokenizer['raw_mail']: Grouping.fields('sha256_random')},
par=1)

json_maker = JsonMaker.spec(
name="json_maker",
inputs={
attachments: Grouping.fields('sha256_random'),
network: Grouping.fields('sha256_random'),
phishing: Grouping.fields('sha256_random'),
raw_mail: Grouping.fields('sha256_random'),
tokenizer['mail']: Grouping.fields('sha256_random'),
urls: Grouping.fields('sha256_random')})

output_debug = OutputDebug.spec(
name="output-debug",
inputs=[json_maker])

0 comments on commit 4c2f236

Please sign in to comment.