diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index d70a1f1d4a..e0cc45f54e 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -45,6 +45,10 @@ jobs: git config user.name intelmq-bot git config user.email intelmq-bot + - name: "Fix images path" + run: | + sed -i 's|docs/static/images/|static/images/|' docs/index.md + - name: "Build docs without publishing" if: github.event_name == 'pull_request' run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index f5f33ab615..1351c9122a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,8 @@ (PR#2408 and PR#2414 by Jan Kaliszewski). - `intelmq.lib.upgrades`: Replace deprecated instances of `url2fqdn` experts by the new `url` expert in runtime configuration (PR#2432 by Sebastian Wagner). - `intelmq.lib.bot`: Ensure closing log files on reloading (PR#2435 by Kamil Mankowski). +- AMQP Pipeline: fix SSL context to pointing to create a client-side connection that verifies the server (PR by Kamil Mankowski). +- Only load the config once when starting intelmqctl (which makes IntelMQ API calls take less time) (PR#2444 by DigitalTrustCenter). ### Development - Makefile: Add codespell and test commands (PR#2425 by Sebastian Wagner). @@ -61,8 +63,20 @@ - Remove undocumented and unused attributes of `StompCollectorBot` instances: `ssl_ca_cert`, `ssl_cl_cert`, `ssl_cl_cert_key`. - Minor fixes/improvements and some refactoring (see also above: *Core*...). +- `intelmq.bots.collectors.amqp`: fix SSL context to pointing to create a client-side connection that verifies the server (PR by Kamil Mankowski). +- `intelmq.bots.collectors.shadowserver.collector_reports_api`: + - The 'json' option is no longer supported as the 'csv' option provides better performance (PR#2372 by elsif2). +- `intelmq.bots.collectors.alienvault_otx.collector` (PR#2449 by qux-bbb): + - Fix modified_pulses_only is always False. + #### Parsers +- `intelmq.bots.parsers.netlab_360.parser`: Removed as the feed is discontinued. (#2442 by Filip Pokorný) +- `intelmq.bots.parsers.webinspektor.parser`: Removed as the feed is discontinued. (#2442 by Filip Pokorný) +- `intelmq.bots.parsers.sucuri.parser`: Removed as the feed is discontinued. (#2442 by Filip Pokorný) +- `intelmq.bots.parsers.shadowserver._config`: + - Switch to dynamic configuration to decouple report schema changes from IntelMQ releases by regularly downloading them from the Shadowserver server (PR#2372 by elsif2). +- `intelmq.bots.parsers.cymru`: Save current line. (PR by Kamil Mankowski) #### Experts - `intelmq.bots.experts.jinja` (PR#2417 by Mikk Margus Möll): @@ -90,6 +104,7 @@ - Try to reconnect on `NotConnectedException`. - `intelmq.bots.outputs.smtp_batch.output` (PR #2439 by Edvard Rejthar): - Fix ability to send with the default `bcc` +- `intelmq.bots.outputs.amqp`: fix SSL context to pointing to create a client-side connection that verifies the server (PR by Kamil Mankowski). ### Documentation - Add a readthedocs configuration file to fix the build fail (PR#2403 by Sebastian Wagner). @@ -97,6 +112,8 @@ - Update/fix/improve the stuff related to the STOMP bots and integration with the *n6*'s Stream API (PR#2408 and PR#2414 by Jan Kaliszewski). - Complete documentation overhaul. Change to markdown format. Uses the mkdocs-material (PR#2419 by Filip Pokorný). +- Adds warning banner if not browsing the latest version of the docs (PR#2445 by Filip Pokorný). +- Fix logo path in index.md when building the docs (PR#2445 by Filip Pokorný). ### Packaging - Add `pendulum` to suggested packages, as it is required for the sieve bot (PR#2424 by Sebastian Wagner). @@ -156,13 +173,11 @@ - added support for `Subject NOT LIKE` queries, - added support for multiple values in ticket subject queries. - `intelmq.bots.collectors.rsync`: Support for optional private key, relative time parsing for the source path, extra rsync parameters and strict host key checking (PR#2241 by Mateo Durante). -- `intelmq.bots.collectors.shadowserver.collector_reports_api`: - - The 'json' option is no longer supported as the 'csv' option provides better performance. #### Parsers - `intelmq.bots.parsers.shadowserver._config`: - Reset detected `feedname` at shutdown to re-detect the feedname on reloads (PR#2361 by @elsif2, fixes #2360). - - Switch to dynamic configuration to decouple report schema changes from IntelMQ releases. + - Switch to dynamic configuration to decouple report schema changes from IntelMQ releases. - Added 'IPv6-Vulnerable-Exchange' alias and 'Accessible-WS-Discovery-Service' report. (PR#2338) - Removed unused `p0f_genre` and `p0f_detail` from the 'DNS-Open-Resolvers' report. (PR#2338) - Added 'Accessible-SIP' report. (PR#2348) diff --git a/debian/cron.d/intelmq-update-database b/debian/cron.d/intelmq-update-database index b9223c2f71..b05e04fd48 100644 --- a/debian/cron.d/intelmq-update-database +++ b/debian/cron.d/intelmq-update-database @@ -3,14 +3,16 @@ # # m h dom mon dow command # Update database for tor_nodes bot, update frequency is unknown: -11 0 * * * intelmq intelmq.bots.experts.tor_nodes.expert --update-database +11 0 * * * intelmq intelmq.bots.experts.tor_nodes.expert --update-database # Update database for maxmind_geoip bot, updated every Tuesday: -30 23 * * 2,5 intelmq intelmq.bots.experts.maxmind_geoip.expert --update-database +30 23 * * 2,5 intelmq intelmq.bots.experts.maxmind_geoip.expert --update-database # Update database for asn_lookup bot, updated every two hours: -5 */2 * * * intelmq intelmq.bots.experts.asn_lookup.expert --update-database +5 */2 * * * intelmq intelmq.bots.experts.asn_lookup.expert --update-database # Update database for domain_suffix bot, updated daily: -45 1 * * * intelmq intelmq.bots.experts.domain_suffix.expert --update-database +45 1 * * * intelmq intelmq.bots.experts.domain_suffix.expert --update-database # Update database for recordedfuture_iprisk bot, update frequency is unknown: -27 1 * * * intelmq intelmq.bots.experts.recordedfuture_iprisk.expert --update-database +27 1 * * * intelmq intelmq.bots.experts.recordedfuture_iprisk.expert --update-database # Update database for domain_valid bot, updated daily: -50 1 * * * intelmq intelmq.bots.experts.domain_valid.expert --update-database \ No newline at end of file +50 1 * * * intelmq intelmq.bots.experts.domain_valid.expert --update-database +# Update schema for shadowserver parser bot: +02 1 * * * intelmq intelmq.bots.parsers.shadowserver.parser --update-schema diff --git a/docs/_overrides/main.html b/docs/_overrides/main.html new file mode 100644 index 0000000000..1d8f68edf7 --- /dev/null +++ b/docs/_overrides/main.html @@ -0,0 +1,11 @@ +{#SPDX-FileCopyrightText: 2023 Filip Pokorný#} +{#SPDX-License-Identifier: AGPL-3.0-or-later#} + +{% extends "base.html" %} + +{% block outdated %} + You're not viewing the latest version. + + Click here to go to latest. + +{% endblock %} diff --git a/docs/index.md b/docs/index.md index 79c3101745..cf097897c3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,7 +7,7 @@ [![CII Badge](https://bestpractices.coreinfrastructure.org/projects/4186/badge)](https://bestpractices.coreinfrastructure.org/projects/4186/) --> -![IntelMQ](/docs/static/images/Logo_Intel_MQ.svg) +![IntelMQ](docs/static/images/Logo_Intel_MQ.svg) # Introduction diff --git a/docs/user/bots.md b/docs/user/bots.md index 8c5e491714..791142dc20 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -930,8 +930,7 @@ oldest files available!). The resulting reports contain the following special field: -- `extra.file_name`: The name of the downloaded file, with fixed filename extension. The API returns file names with the - extension `.csv`, although the files are JSON, not CSV. Therefore, for clarity and better error detection in the parser, the file name in `extra.file_name` uses `.json` as extension. +- `extra.file_name`: The name of the downloaded file, with fixed filename extension. **Module:** `intelmq.bots.collectors.shadowserver.collector_reports_api` @@ -951,7 +950,7 @@ The resulting reports contain the following special field: **`types`** -(optional, string/array of strings) An array of strings (or a list of comma-separated values) with the names of report types you want to process. If you leave this empty, all the available reports will be downloaded and processed (i.e. 'scan', 'drones', 'intel', 'sandbox_connection', 'sinkhole_combined'). The possible report types are equivalent to the file names given in the section Supported Reports of the [Shadowserver parser](#intelmq.bots.parsers.shadowserver.parser). +(optional, string/array of strings) An array of strings (or a list of comma-separated values) with the names of report types you want to process. If you leave this empty, all the available reports will be downloaded and processed (i.e. 'scan', 'drones', 'intel', 'sandbox_connection', 'sinkhole_combined'). The possible report types are equivalent to the file names defined the the schema. Please see the Supported Reports of the [Shadowserver parser](#intelmq.bots.parsers.shadowserver.parser) for details. **Sample configuration** @@ -2128,23 +2127,23 @@ correct mapping of the columns: 1. **Automatic report type detection** - Since IntelMQ version 2.1 the parser can detect the feed based on metadata provided by the collector. - - When processing a report, this bot takes `extra.file_name` from the report and looks in `config.py` how the report - should be parsed. If this lookup is not possible, and the `feedname` is not given as parameter, the feed cannot be - parsed. - - The field `extra.file_name` has the following structure: `%Y-%m-%d-${report_name}[-suffix].csv` where the optional - suffix can be something like `country-geo`. For example, some possible filenames - are `2019-01-01-scan_http-country-geo.csv` or `2019-01-01-scan_tftp.csv`. The important part is the `report_name`, - between the date and the suffix. Since version 2.1.2 the date in the filename is optional, so filenames - like `scan_tftp.csv` are also detected. + Since IntelMQ version 2.1 the parser can detect the feed based on metadata provided by the collector. + + When processing a report, this bot takes `extra.file_name` from the report and looks in `config.py` how the report + should be parsed. If this lookup is not possible, and the `feedname` is not given as parameter, the feed cannot be + parsed. + + The field `extra.file_name` has the following structure: `%Y-%m-%d-${report_name}[-suffix].csv` where the optional + suffix can be something like `country-geo`. For example, some possible filenames + are `2019-01-01-scan_http-country-geo.csv` or `2019-01-01-scan_tftp.csv`. The important part is the `report_name`, + between the date and the suffix. Since version 2.1.2 the date in the filename is optional, so filenames + like `scan_tftp.csv` are also detected. 2. **Fixed report type** - If the method above is not possible and for upgraded instances, the report type can be set with the `feedname` - parameter. Report type is derived from the subject of Shadowserver e-mails. A list of possible values of - the `feedname` parameter can be found in the table below in the column "Report Type". + If the method above is not possible and for upgraded instances, the report type can be set with the `feedname` + parameter. Report type is derived from the subject of Shadowserver e-mails. A list of possible values of + the `feedname` parameter can be found in the table below in the column "Report Type". **Module:** @@ -2154,7 +2153,9 @@ correct mapping of the columns: **`feedname`** -(optional, string) Name of the Shadowserver report, see list below for possible values. +(optional, string) Name of the Shadowserver report. The value for each report type can be found in the schema `feed_name` field. + +For example using `curl -s https://interchange.shadowserver.org/intelmq/v1/schema | jq .[].feed_name`. **`overwrite`** @@ -2200,6 +2201,7 @@ The schema revision history is maintained at https://github.com/The-Shadowserver auto_update: true run_mode: continuous ``` + --- ### Shodan
diff --git a/docs/user/feeds.md b/docs/user/feeds.md index 8224336d8b..06c67fb468 100644 --- a/docs/user/feeds.md +++ b/docs/user/feeds.md @@ -1899,101 +1899,6 @@ module: intelmq.bots.parsers.microsoft.parser_ctip --- -## Netlab 360 - -### DGA - -This feed lists DGA family, Domain, Start and end of valid time(UTC) of a number of DGA families. - -**Public:** yes - -**Revision:** 2018-01-20 - -**Documentation:** - - -**Collector configuration** - -```yaml -module: intelmq.bots.collectors.http.collector_http -parameters: - http_url: http://data.netlab.360.com/feeds/dga/dga.txt - name: DGA - provider: Netlab 360 - rate_limit: 3600 -``` - -**Parser configuration** - -```yaml -module: intelmq.bots.parsers.netlab_360.parser -``` - ---- - - -### Hajime Scanner - -This feed lists IP address for know Hajime bots network. These IPs data are obtained by joining the DHT network and interacting with the Hajime node - -**Public:** yes - -**Revision:** 2019-08-01 - -**Documentation:** - - -**Collector configuration** - -```yaml -module: intelmq.bots.collectors.http.collector_http -parameters: - http_url: https://data.netlab.360.com/feeds/hajime-scanner/bot.list - name: Hajime Scanner - provider: Netlab 360 - rate_limit: 3600 -``` - -**Parser configuration** - -```yaml -module: intelmq.bots.parsers.netlab_360.parser -``` - ---- - - -### Magnitude EK - -This feed lists FQDN and possibly the URL used by Magnitude Exploit Kit. Information also includes the IP address used for the domain and last time seen. - -**Public:** yes - -**Revision:** 2018-01-20 - -**Documentation:** - - -**Collector configuration** - -```yaml -module: intelmq.bots.collectors.http.collector_http -parameters: - http_url: http://data.netlab.360.com/feeds/ek/magnitude.txt - name: Magnitude EK - provider: Netlab 360 - rate_limit: 3600 -``` - -**Parser configuration** - -```yaml -module: intelmq.bots.parsers.netlab_360.parser -``` - ---- - - ## OpenPhish ### Premium Feed @@ -2495,41 +2400,6 @@ module: intelmq.bots.parsers.github_feed --- -## Sucuri - -### Hidden IFrames - -Latest hidden iframes identified on compromised web sites. - -**Public:** yes - -**Revision:** 2018-01-28 - -**Documentation:** - -**Additional Information:** Please note that the parser only extracts the hidden iframes and the conditional redirects, not the encoded javascript. - - -**Collector configuration** - -```yaml -module: intelmq.bots.collectors.http.collector_http -parameters: - http_url: http://labs.sucuri.net/?malware - name: Hidden IFrames - provider: Sucuri - rate_limit: 86400 -``` - -**Parser configuration** - -```yaml -module: intelmq.bots.parsers.sucuri.parser -``` - ---- - - ## Surbl ### Malicious Domains @@ -2917,37 +2787,6 @@ parameters: --- -## WebInspektor - -### Unsafe sites - -Latest detected unsafe sites. - -**Public:** yes - -**Revision:** 2018-03-09 - - -**Collector configuration** - -```yaml -module: intelmq.bots.collectors.http.collector_http -parameters: - http_url: https://app.webinspector.com/public/recent_detections/ - name: Unsafe sites - provider: WebInspektor - rate_limit: 60 -``` - -**Parser configuration** - -```yaml -module: intelmq.bots.parsers.webinspektor.parser -``` - ---- - - ## ZoneH ### Defacements diff --git a/intelmq/bin/intelmqctl.py b/intelmq/bin/intelmqctl.py index 51301b1d82..9ad9266d08 100644 --- a/intelmq/bin/intelmqctl.py +++ b/intelmq/bin/intelmqctl.py @@ -87,6 +87,11 @@ def __init__(self, interactive: bool = False, returntype: ReturnType = ReturnTyp self._parameters.logging_handler = 'file' self._parameters.logging_path = DEFAULT_LOGGING_PATH + try: + self._runtime_configuration = utils.load_configuration(RUNTIME_CONF_FILE) + except ValueError as exc: # pragma: no cover + self.abort(f'Error loading {RUNTIME_CONF_FILE!r}: {exc}') + # Try to get logging_level from defaults configuration, else use default (defined above) defaults_loading_exc = None try: @@ -203,11 +208,6 @@ def __init__(self, interactive: bool = False, returntype: ReturnType = ReturnTyp intelmqctl debug --get-environment-variables ''' - try: - self._runtime_configuration = utils.load_configuration(RUNTIME_CONF_FILE) - except ValueError as exc: # pragma: no cover - self.abort(f'Error loading {RUNTIME_CONF_FILE!r}: {exc}') - self._processmanagertype = getattr(self._parameters, 'process_manager', 'intelmq') if self._processmanagertype not in process_managers(): self.abort('Invalid process manager given: %r, should be one of %r.' '' % (self._processmanagertype, list(process_managers().keys()))) @@ -384,7 +384,8 @@ def __init__(self, interactive: bool = False, returntype: ReturnType = ReturnTyp ) def load_defaults_configuration(self, silent=False): - for option, value in utils.get_global_settings().items(): + global_settings = self._runtime_configuration.get('global', {}) + for option, value in global_settings.items(): setattr(self._parameters, option, value) # copied from intelmq.lib.bot, should be refactored to e.g. intelmq.lib.config @@ -940,6 +941,12 @@ def check(self, no_connections=False, check_executables=True): check_logger.error('SyntaxError in bot %r: %r', bot_id, exc) retval = 1 continue + except AttributeError: + # if module does not exist, utils.get_bot_module_name returns None. import_module then raises + # AttributeError: 'NoneType' object has no attribute 'startswith' + check_logger.error('Incomplete installation: Bot %r not importable.', bot_id,) + retval = 1 + continue bot = getattr(bot_module, 'BOT') bot_parameters = copy.deepcopy(global_settings) bot_parameters.update(bot_config.get('parameters', {})) # the parameters field may not exist diff --git a/intelmq/bots/collectors/alienvault_otx/collector.py b/intelmq/bots/collectors/alienvault_otx/collector.py index 88e7528adc..171c487abc 100644 --- a/intelmq/bots/collectors/alienvault_otx/collector.py +++ b/intelmq/bots/collectors/alienvault_otx/collector.py @@ -26,10 +26,6 @@ def init(self): if OTXv2 is None: raise MissingDependencyError("OTXv2") - self.modified_pulses_only = False - if hasattr(self, 'modified_pulses_only'): - self.modified_pulses_only = self.modified_pulses_only - def process(self): self.logger.info("Downloading report through API.") otx = OTXv2(self.api_key, proxy=self.https_proxy) diff --git a/intelmq/bots/collectors/amqp/collector_amqp.py b/intelmq/bots/collectors/amqp/collector_amqp.py index 543f4e0bed..291aa1ecfc 100644 --- a/intelmq/bots/collectors/amqp/collector_amqp.py +++ b/intelmq/bots/collectors/amqp/collector_amqp.py @@ -55,7 +55,7 @@ def init(self): self.password) if self.use_ssl: - self.kwargs['ssl_options'] = pika.SSLOptions(context=ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)) + self.kwargs['ssl_options'] = pika.SSLOptions(context=ssl.create_default_context(ssl.Purpose.SERVER_AUTH)) self.connection_parameters = pika.ConnectionParameters( host=self.connection_host, diff --git a/intelmq/bots/outputs/amqptopic/output.py b/intelmq/bots/outputs/amqptopic/output.py index 68d1c366b1..03c0faba5e 100644 --- a/intelmq/bots/outputs/amqptopic/output.py +++ b/intelmq/bots/outputs/amqptopic/output.py @@ -64,7 +64,7 @@ def init(self): self.password) if self.use_ssl: - self.kwargs['ssl_options'] = pika.SSLOptions(context=ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)) + self.kwargs['ssl_options'] = pika.SSLOptions(context=ssl.create_default_context(ssl.Purpose.SERVER_AUTH)) self.connection_parameters = pika.ConnectionParameters( host=self.connection_host, diff --git a/intelmq/bots/parsers/cymru/parser_cap_program.py b/intelmq/bots/parsers/cymru/parser_cap_program.py index 94af6365f8..dc54c4565c 100644 --- a/intelmq/bots/parsers/cymru/parser_cap_program.py +++ b/intelmq/bots/parsers/cymru/parser_cap_program.py @@ -64,6 +64,7 @@ def parse(self, report): elif 'Data file written at' in line: self.parse_line = self.parse_line_old else: + self._current_line = line yield line def parse_bot_old(self, comment_split, report_type, event): diff --git a/intelmq/bots/parsers/netlab_360/__init__.py b/intelmq/bots/parsers/netlab_360/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/intelmq/bots/parsers/netlab_360/parser.py b/intelmq/bots/parsers/netlab_360/parser.py deleted file mode 100644 index 04017e942e..0000000000 --- a/intelmq/bots/parsers/netlab_360/parser.py +++ /dev/null @@ -1,70 +0,0 @@ -# SPDX-FileCopyrightText: 2016 jgedeon120 -# -# SPDX-License-Identifier: AGPL-3.0-or-later - -# -*- coding: utf-8 -*- -""" IntelMQ parser for Netlab 360 data feeds. """ - -from intelmq.lib.bot import ParserBot -from intelmq.lib.harmonization import DateTime - - -class Netlab360ParserBot(ParserBot): - """Parse the Netlab 360 DGA, Hajime, Magnitude and Mirai feeds""" - DGA_FEED = {'http://data.netlab.360.com/feeds/dga/dga.txt', - 'https://data.netlab.360.com/feeds/dga/dga.txt'} - MAGNITUDE_FEED = {'http://data.netlab.360.com/feeds/ek/magnitude.txt', - 'https://data.netlab.360.com/feeds/ek/magnitude.txt'} - MIRAI_SCANNER_FEED = {'http://data.netlab.360.com/feeds/mirai-scanner/scanner.list', - 'https://data.netlab.360.com/feeds/mirai-scanner/scanner.list'} - HAJIME_SCANNER_FEED = {'http://data.netlab.360.com/feeds/hajime-scanner/bot.list', - 'https://data.netlab.360.com/feeds/hajime-scanner/bot.list'} - - def parse_line(self, line, report): - if line.startswith('#') or not line.strip(): - self.tempdata.append(line) - - else: - value = line.split('\t') - event = self.new_event(report) - event.add('classification.identifier', value[0].lower()) - event.add('raw', line) - - if report['feed.url'] in Netlab360ParserBot.DGA_FEED: - event.add('source.fqdn', value[1]) - # DGA Feed format is - # DGA family, Domain, Start and end of valid time(UTC) - - event.add('time.source', value[2] + ' UTC') - if event['time.source'] > event['time.observation']: - event.change('time.source', event['time.observation']) - event.add('classification.type', 'c2-server') - event.add('event_description.url', 'http://data.netlab.360.com/dga') - - elif report['feed.url'] in Netlab360ParserBot.MAGNITUDE_FEED: - event.add('time.source', DateTime.from_timestamp(int(value[1]))) - event.add('source.ip', value[2]) - # ignore ips as fqdns - event.add('source.fqdn', value[3], raise_failure=False) - if value[4] != 'N/A': - event.add('source.url', value[4]) - event.add('classification.type', 'exploit') - event.add('event_description.url', 'http://data.netlab.360.com/ek') - elif report['feed.url'] in Netlab360ParserBot.MIRAI_SCANNER_FEED: - event.add('time.source', value[0] + ' UTC') - event.add('source.ip', value[1].replace('sip=', '')) - event.add('destination.port', value[2].replace('dport=', '')) - event.add('classification.type', 'scanner') - event.add('classification.identifier', 'mirai', overwrite=True) - elif report['feed.url'] in Netlab360ParserBot.HAJIME_SCANNER_FEED: - event.add('time.source', value[0] + 'T00:00:00 UTC') - event.add('source.ip', value[1].replace('ip=', '')) - event.add('classification.type', 'scanner') - event.add('classification.identifier', 'hajime', overwrite=True) - else: - raise ValueError('Unknown data feed %s.' % report['feed.url']) - - yield event - - -BOT = Netlab360ParserBot diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 6931e54109..6f3be4c145 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -126,7 +126,7 @@ def enable_auto_update(enable): __config.auto_update = enable -def get_feed_by_feedname(given_feedname: str) -> Optional[Dict[str, Any]]: +def get_feed_by_feedname(given_feedname: str) -> Optional[Tuple[str, Dict[str, Any]]]: return __config.feedname_mapping.get(given_feedname, None) @@ -290,6 +290,20 @@ def category_or_detail(value: str, row: Dict[str, str]) -> str: return row.get('detail', '') +def extract_cve_from_tag(tag: str) -> Optional[str]: + """ Returns a string with a sorted semicolon-separated list of CVEs or None if no CVE found in tag. """ + cveset = set() + tags = tag.split(";") + + for t in tags: + if re.match('^cve-[0-9]+-[0-9]+$', t): + cveset.add(t) + + if not (len(cveset)): + return None + return (';'.join(str(c) for c in sorted(cveset))) + + functions = { 'add_UTC_to_timestamp': add_UTC_to_timestamp, 'convert_bool': convert_bool, @@ -308,6 +322,7 @@ def category_or_detail(value: str, row: Dict[str, str]) -> str: 'scan_exchange_type': scan_exchange_type, 'scan_exchange_identifier': scan_exchange_identifier, 'category_or_detail': category_or_detail, + 'extract_cve_from_tag': extract_cve_from_tag, } diff --git a/intelmq/bots/parsers/shadowserver/parser.py b/intelmq/bots/parsers/shadowserver/parser.py index ec1908269a..e3d0eb2317 100644 --- a/intelmq/bots/parsers/shadowserver/parser.py +++ b/intelmq/bots/parsers/shadowserver/parser.py @@ -63,6 +63,7 @@ def init(self): if self.feedname is not None: self._sparser_config = config.get_feed_by_feedname(self.feedname) if self._sparser_config: + self._sparser_config = self._sparser_config[1] self.logger.info('Using fixed feed name %r for parsing reports.' % self.feedname) self._mode = 'fixed' else: diff --git a/intelmq/bots/parsers/sucuri/__init__.py b/intelmq/bots/parsers/sucuri/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/intelmq/bots/parsers/sucuri/parser.py b/intelmq/bots/parsers/sucuri/parser.py deleted file mode 100644 index 2a2bae9517..0000000000 --- a/intelmq/bots/parsers/sucuri/parser.py +++ /dev/null @@ -1,71 +0,0 @@ -# SPDX-FileCopyrightText: 2018 dargen3 -# -# SPDX-License-Identifier: AGPL-3.0-or-later - -# -*- coding: utf-8 -*- -""" -Only parses hidden iframes and conditional redirections, not Encoded javascript. -""" -import re -from html.parser import HTMLParser - -from intelmq.lib import utils -from intelmq.lib.bot import ParserBot - - -class MyHTMLParser(HTMLParser): - - lsData = "" - - def handle_data(self, data): - self.lsData = data - - -parser = MyHTMLParser() -remove_comments = re.compile(r"", re.DOTALL) - - -class SucuriParserBot(ParserBot): - """Parse the Sucuri Malware Hidden Iframes and Conditional redirections feeds""" - def process(self): - report = self.receive_message() - raw_report = utils.base64_decode(report["raw"]) # decoding - report_list = [row.strip() for row in raw_report.splitlines()] - index = 0 - actual_line = report_list[index] - while parser.lsData != "Hidden iframes": # displacement to target table - index += 1 - actual_line = report_list[index] - parser.feed(actual_line) - while actual_line[:8] != "": # scrabing table data - index += 1 - raw_actual_line = report_list[index] - actual_line = remove_comments.sub("", raw_actual_line).replace('.', '.') - if actual_line[:2] == "
- -
- PRICING - SUPPORT - LOGIN -
- -
- Home Notes Malware Signatures About
- -
- -
-
- -
-

Latest Malware Entries (24 hrs)

- -
- - -
-
-

We separate the data in three categories: Iframes, redirections and javascript. For each one you can click on the domain for more information, IP addresses and details on the malware.

- -

Hidden iframes

-

-Latest hidden iframes our scanner have identified on compromised web sites. - - - -
# of sites infectedTypeMalware / Domains
55iframehttp://poseyhumane6;org/stats.php
6iframehttp://zumobtr.ru/gate.;php?f=1041671
Limited view (40 rows)... Only the top entries being displayed.

- - -

Conditional redirections

-

-Conditional redirections we have detected (based on user agents or referers). - - - -
# of sites infectedTypeMalware / Domains
9redirectionshttp://goodhotwebmart.in/
6redirectionshttp://www.;mpzbearing.in/
5redirectionshttp://portal-d.pw/XcTyTp
Limited view (40 rows)... Only the top entries being displayed.

- -

Encoded javascript

-

-Encoded javascript (redirecting to blackhole and other exploit kits) or to build -a remote call. - - -
# of sites infectedTypeMalware / Domains
12javascripthttp://div-class-container.ru/m/": var a910ab1=[855,915,955,960,973,887,970,971,976,963,956,916.6;.
22javascript<script>var b="red";c="mod";function setCookie(a,b,c){var d=new Date;d.setTime(d.getTime()+60*c...
Limited view (40 rows)... Only the top entries being displayed.
-

- - -
- - - -
-
-
-
- - - - - - - - - - diff --git a/intelmq/tests/bots/parsers/sucuri/test_sucuri.data.license b/intelmq/tests/bots/parsers/sucuri/test_sucuri.data.license deleted file mode 100644 index 6a0d1638c4..0000000000 --- a/intelmq/tests/bots/parsers/sucuri/test_sucuri.data.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-FileCopyrightText: 2018 dargen3 -SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/tests/bots/parsers/sucuri/test_sucuri.py b/intelmq/tests/bots/parsers/sucuri/test_sucuri.py deleted file mode 100644 index bd8cd5a8c4..0000000000 --- a/intelmq/tests/bots/parsers/sucuri/test_sucuri.py +++ /dev/null @@ -1,66 +0,0 @@ -# SPDX-FileCopyrightText: 2018 dargen3 -# -# SPDX-License-Identifier: AGPL-3.0-or-later - -# -*- coding: utf-8 -*- -import codecs -import os -import unittest - -import intelmq.lib.test as test -from intelmq.bots.parsers.sucuri.parser import SucuriParserBot -from intelmq.lib import utils - -with codecs.open(os.path.join(os.path.dirname(__file__), 'test_sucuri.data'), encoding='UTF-8') as handle: - REPORT_DATA = handle.read() - REPORT_DATA_SPLIT = REPORT_DATA.splitlines() - -REPORT = {"__type": "Report", - "feed.name": "Sucuri Hidden Iframes", - "feed.url": "http://labs.sucuri.net/?malware", - "raw": utils.base64_encode(REPORT_DATA), - "time.observation": "2018-01-22T14:38:24+00:00", - } -EVENT1 = {"__type": "Event", - "classification.identifier": "hidden-iframe", - "classification.type": "blacklist", - "feed.name": "Sucuri Hidden Iframes", - "feed.url": "http://labs.sucuri.net/?malware", - "raw": utils.base64_encode(REPORT_DATA_SPLIT[616]), - "source.url": "http://poseyhumane.org/stats.php", - "time.observation": "2018-01-24T14:23:34+00:00", - } -EVENT2 = {"classification.identifier": "hidden-iframe", - "feed.url": "http://labs.sucuri.net/?malware", - "time.observation": "2018-01-24T15:58:48+00:00", - "__type": "Event", - "feed.name": "Sucuri Hidden Iframes", - "source.url": "http://zumobtr.ru/gate.php?f=1041671", - "raw": utils.base64_encode(REPORT_DATA_SPLIT[617]), - "classification.type": "blacklist", - } -EVENT3 = {'__type': 'Event', - 'classification.identifier': 'conditional-redirection', - 'classification.type': 'blacklist', - 'feed.name': 'Sucuri Hidden Iframes', - 'feed.url': 'http://labs.sucuri.net/?malware', - "raw": utils.base64_encode(REPORT_DATA_SPLIT[624]), - 'source.url': 'http://goodhotwebmart.in/', - } - - -class TestSucuriParserBot(test.BotTestCase, unittest.TestCase): - - @classmethod - def set_bot(cls): - cls.bot_reference = SucuriParserBot - cls.default_input_message = REPORT - - def test_event(self): - self.run_bot() - self.assertMessageEqual(0, EVENT1) - self.assertMessageEqual(1, EVENT2) - self.assertMessageEqual(2, EVENT3) - -if __name__ == '__main__': # pragma: no cover - unittest.main() diff --git a/intelmq/tests/bots/parsers/webinspektor/__init__.py b/intelmq/tests/bots/parsers/webinspektor/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/intelmq/tests/bots/parsers/webinspektor/test_webinspektor.data b/intelmq/tests/bots/parsers/webinspektor/test_webinspektor.data deleted file mode 100644 index a42584c180..0000000000 --- a/intelmq/tests/bots/parsers/webinspektor/test_webinspektor.data +++ /dev/null @@ -1,582 +0,0 @@ - - - - Recent Malware Detection by Web Inspector | Malware and Blacklisted Sites List - - - - - - - - - - - - - - - - - - - - -
-
- - - - -
- - - - -
- -

Unsafe sites

-

- This list contains some of websites which host malicious, suspicious content and malware files. It is updated every 60 mins. - These sites were checked in the last 24 hours. -

-

You may search website reports detected in the last 30 days.

- -
- - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Unsafe SiteResultLast Detected AtDetails
https://cummins.inhance.ioSuspicious2018-02-13 08:44:30 UTC - Suspicious pages were found on this site. -
-
http://naclkuso.myweb.hinet.netSuspicious2018-02-13 08:12:00 UTC - Suspicious pages were found on this site. -
-
http://wapmobi.sextgem.comSuspicious2018-02-13 08:11:46 UTC - Suspicious pages were found on this site. -
-
http://momentum.co.crSuspicious2018-02-13 08:11:29 UTC - Suspicious pages were found on this site. -
-
http://warning-2jusz4.streamSuspicious2018-02-13 08:11:26 UTC - Suspicious pages were found on this site. -
-
http://www.bhejacry.comSuspicious2018-02-13 08:11:07 UTC - Suspicious pages were found on this site. -
-
http://www.circuit-projects.comSuspicious2018-02-13 08:09:15 UTC - Suspicious pages were found on this site. -
-
http://www.coolangattafootballclub.com.a ...Suspicious2018-02-13 08:08:20 UTC - Suspicious pages were found on this site. -
-
http://drbarbarajames.comSuspicious2018-02-13 08:07:50 UTC - Suspicious pages were found on this site. -
-
http://larklands.netSuspicious2018-02-13 08:07:40 UTC - Suspicious pages were found on this site. -
-
http://m2jbdistribuidora.com.brSuspicious2018-02-13 08:07:35 UTC - Suspicious pages were found on this site. -
-
http://maven-aviation.comSuspicious2018-02-13 08:07:21 UTC - Suspicious pages were found on this site. -
-
http://authprwz.infoSuspicious2018-02-13 08:07:19 UTC - Suspicious pages were found on this site. -
-
http://monksstores.co.zaSuspicious2018-02-13 08:06:59 UTC - Suspicious pages were found on this site. -
-
http://atendimento-seguro.comunicadoimpo ...Suspicious2018-02-13 08:06:58 UTC - Suspicious pages were found on this site. -
-
http://nluxbambla.comSuspicious2018-02-13 08:06:52 UTC - Suspicious pages were found on this site. -
-
http://arubtrading.comSuspicious2018-02-13 08:06:34 UTC - Suspicious pages were found on this site. -
-
http://autoupdatenoreply61893124792830in ...Suspicious2018-02-13 08:06:23 UTC - Suspicious pages were found on this site. -
-
http://atendimento.acess.mobiSuspicious2018-02-13 08:06:22 UTC - Suspicious pages were found on this site. -
-
http://capitale-one-bank-login-secured.e ...Suspicious2018-02-13 08:06:22 UTC - Suspicious pages were found on this site. -
-
http://black.pkSuspicious2018-02-13 08:06:20 UTC - Suspicious pages were found on this site. -
-
http://asharna.comSuspicious2018-02-13 08:06:19 UTC - Suspicious pages were found on this site. -
-
http://niyiijaola.comSuspicious2018-02-13 08:06:07 UTC - Suspicious pages were found on this site. -
-
http://autosegurancabrasil.comSuspicious2018-02-13 08:06:02 UTC - Suspicious pages were found on this site. -
-
http://celebritygruop.comSuspicious2018-02-13 08:05:57 UTC - Suspicious pages were found on this site. -
-
http://caananlimited.comSuspicious2018-02-13 08:05:52 UTC - Suspicious pages were found on this site. -
-
http://banzaiwaterslides.netSuspicious2018-02-13 08:05:51 UTC - Suspicious pages were found on this site. -
-
http://beelinecg.comSuspicious2018-02-13 08:05:50 UTC - Suspicious pages were found on this site. -
-
http://bethmdesign.comSuspicious2018-02-13 08:05:35 UTC - Suspicious pages were found on this site. -
-
http://carlisassuranceameli24h.comSuspicious2018-02-13 08:05:31 UTC - Suspicious pages were found on this site. -
-
http://capev-ven.comSuspicious2018-02-13 08:05:27 UTC - Suspicious pages were found on this site. -
-
http://chicken2go.co.ukSuspicious2018-02-13 08:05:20 UTC - Suspicious pages were found on this site. -
-
http://www.appearantly.comSuspicious2018-02-13 08:05:19 UTC - Suspicious pages were found on this site. -
-
http://wembleyproductions.comSuspicious2018-02-13 08:05:12 UTC - Suspicious pages were found on this site. -
-
http://consultbm.co.zaSuspicious2018-02-13 08:05:00 UTC - Suspicious pages were found on this site. -
-
http://www.almanarahalelmeya.comSuspicious2018-02-13 08:04:58 UTC - Suspicious pages were found on this site. -
-
http://www.agencia.larue.com.brSuspicious2018-02-13 08:04:44 UTC - Suspicious pages were found on this site. -
-
http://www.bycod.comSuspicious2018-02-13 08:04:37 UTC - Suspicious pages were found on this site. -
-
http://conduceseguro.gob.mxSuspicious2018-02-13 08:04:32 UTC - Suspicious pages were found on this site. -
-
http://dofficepro.comSuspicious2018-02-13 08:04:31 UTC - Suspicious pages were found on this site. -
-
http://www.capa.com.brSuspicious2018-02-13 08:04:29 UTC - Suspicious pages were found on this site. -
-
http://estudiokgo.com.arSuspicious2018-02-13 08:04:23 UTC - Suspicious pages were found on this site. -
-
http://www.andrewrobertsllc.infoSuspicious2018-02-13 08:04:19 UTC - Suspicious pages were found on this site. -
-
http://dmpbmzbmtr8c40a.jonesnewsletter.c ...Suspicious2018-02-13 08:04:04 UTC - Suspicious pages were found on this site. -
-
http://farmaciasm3.clSuspicious2018-02-13 08:03:38 UTC - Suspicious pages were found on this site. -
-
http://devv-c0nfr1m.verifikasion1.gaSuspicious2018-02-13 08:03:33 UTC - Suspicious pages were found on this site. -
-
http://fbkepo.comSuspicious2018-02-13 08:03:27 UTC - Suspicious pages were found on this site. -
-
http://www.cleartaxsupport.comSuspicious2018-02-13 08:03:24 UTC - Suspicious pages were found on this site. -
-
http://www.delbox.com.brSuspicious2018-02-13 08:03:21 UTC - Suspicious pages were found on this site. -
-
http://cwaustralia.comSuspicious2018-02-13 08:03:15 UTC - Suspicious pages were found on this site. -
-
- - -
- - -
- - Display - -
- - - -
-
-
- - - - - diff --git a/intelmq/tests/bots/parsers/webinspektor/test_webinspektor.data.license b/intelmq/tests/bots/parsers/webinspektor/test_webinspektor.data.license deleted file mode 100644 index 6a0d1638c4..0000000000 --- a/intelmq/tests/bots/parsers/webinspektor/test_webinspektor.data.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-FileCopyrightText: 2018 dargen3 -SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/tests/bots/parsers/webinspektor/test_webinspektor.py b/intelmq/tests/bots/parsers/webinspektor/test_webinspektor.py deleted file mode 100644 index 7e71c7b248..0000000000 --- a/intelmq/tests/bots/parsers/webinspektor/test_webinspektor.py +++ /dev/null @@ -1,69 +0,0 @@ -# SPDX-FileCopyrightText: 2018 dargen3 -# -# SPDX-License-Identifier: AGPL-3.0-or-later - -# -*- coding: utf-8 -*- -import os -import unittest - -import intelmq.lib.test as test -from intelmq.bots.parsers.webinspektor.parser import WebinspektorParserBot -from intelmq.lib import utils - -with open(os.path.join(os.path.dirname(__file__), 'test_webinspektor.data')) as handle: - REPORT_DATA = handle.read() - REPORT_DATA_SPLIT = REPORT_DATA.splitlines() - -REPORT = {"__type": "Report", - "feed.name": "Webinspektor", - "feed.url": "https://app.webinspector.com/public/recent_detections", - "raw": utils.base64_encode(REPORT_DATA), - "time.observation": "2018-01-22T14:38:24+00:00", - } -EVENT1 = {"raw": utils.base64_encode(REPORT_DATA_SPLIT[102].strip() + REPORT_DATA_SPLIT[104].strip()), - "__type": "Event", - "time.observation": "2018-01-22T14:38:24+00:00", - "feed.url": "https://app.webinspector.com/public/recent_detections", - "source.url": "https://cummins.inhance.io", - "classification.taxonomy": "other", - "classification.type": "blacklist", - "classification.identifier": "Suspicious", - "time.source": "2018-02-13T08:44:30+00:00", - "feed.name": "Webinspektor"} -EVENT2 = {"raw": utils.base64_encode(REPORT_DATA_SPLIT[111].strip() + REPORT_DATA_SPLIT[113].strip()), - "__type": "Event", - "time.observation": "2018-01-22T14:38:24+00:00", - "feed.url": "https://app.webinspector.com/public/recent_detections", - "source.url": "http://naclkuso.myweb.hinet.net", - "classification.taxonomy": "other", - "classification.type": "blacklist", - "classification.identifier": "Suspicious", - "time.source": "2018-02-13T08:12:00+00:00", - "feed.name": "Webinspektor"} -EVENT3 = {"raw": utils.base64_encode(REPORT_DATA_SPLIT[120].strip() + REPORT_DATA_SPLIT[122].strip()), - "__type": "Event", - "time.observation": "2018-01-22T14:38:24+00:00", - "feed.url": "https://app.webinspector.com/public/recent_detections", - "source.url": "http://wapmobi.sextgem.com", - "classification.taxonomy": "other", - "classification.identifier": "Suspicious", - "time.source": "2018-02-13T08:11:46+00:00", - "classification.type": "blacklist", - "feed.name": "Webinspektor"} - - -class TestWebinspektorParserBot(test.BotTestCase, unittest.TestCase): - - @classmethod - def set_bot(cls): - cls.bot_reference = WebinspektorParserBot - cls.default_input_message = REPORT - - def test_event(self): - self.run_bot() - self.assertMessageEqual(0, EVENT1) - self.assertMessageEqual(1, EVENT2) - self.assertMessageEqual(2, EVENT3) - -if __name__ == '__main__': # pragma: no cover - unittest.main() diff --git a/intelmq/tests/lib/test_upgrades.py b/intelmq/tests/lib/test_upgrades.py index dfdfdca0a5..9d34ab39e9 100644 --- a/intelmq/tests/lib/test_upgrades.py +++ b/intelmq/tests/lib/test_upgrades.py @@ -574,6 +574,30 @@ }, } +V322_DISCONTINUED_BOTS_AND_FEEDS_IN = { + "global": {}, + "sucuri-parser": { + "module": "intelmq.bots.parsers.sucuri.parser" + }, + "webinspektor-parser": { + "module": "intelmq.bots.parsers.webinspektor.parser" + }, + "netlab360-parser": { + "module": "intelmq.bots.parsers.netlab_360.parser" + }, + "sucuri-collector": { + "module": "intelmq.bots.collectors.http.collector", + "parameters": { + "http_url": "http://labs.sucuri.net/?malware" + } + } +} + +V322_DISCONTINUED_BOTS_AND_FEEDS_OUT = """\ +Found discontinued bots: sucuri-parser, webinspektor-parser, netlab360-parser +Found discontinued feeds collected by bots: sucuri-collector +Remove the affected bots from the configuration.""" + def generate_function(function): def test_function(self): @@ -795,6 +819,11 @@ def test_v322_url_replacement(self): self.assertTrue(result[0]) self.assertEqual(V322_URL2FQN_OUT, result[1]) + def test_v322_removed_feeds_and_bots(self): + """ Test v322_removed_feeds_and_bots """ + result = upgrades.v322_removed_feeds_and_bots(V322_DISCONTINUED_BOTS_AND_FEEDS_IN, {}, False) + self.assertEqual(V322_DISCONTINUED_BOTS_AND_FEEDS_OUT, result[0]) + for name in upgrades.__all__: setattr(TestUpgradeLib, 'test_function_%s' % name, diff --git a/mkdocs.yml b/mkdocs.yml index c74b2b2474..80492559d3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -19,6 +19,7 @@ site_dir: docs_build theme: name: material logo: static/images/Logo_Intel_MQ.svg + custom_dir: docs/_overrides palette: # Palette toggle for light mode - scheme: default