From 4527da7e7fba6b48322c06b039b9dedf5eab8d34 Mon Sep 17 00:00:00 2001 From: Sebastian Waldbauer Date: Fri, 4 Feb 2022 13:42:48 +0100 Subject: [PATCH] FIX/ENH: HttpMixin refactored and various fixes **General** Removed 'requests' MissingDependencyError, because requests is a core lib from intelmq Removed HTTP variables from Bot class in favor of HttpMixin Removed trying to import requests in pipeline, its a core lib from intelmq Added additional configuration variables to HttpMixin ( from Bot class ) **Bots** GitHub API is now using HttpMixin MS Azure Collector is now using HttpMixin DO-Portal Expert is now using HttpMixin GeoHash using MissingDependencyError instead of ValueError (consistency) HttpContentExpert is now using HttpMixin HttpStatusExpert is now using HttpMixin NationalCERTContactCertATExpert is now using HttpMixin RDAPExpert is now using HttpMixin RIPEExpert is now using HttpMixin SplunkSavedSearchExpert is now using HttpMixin TuencyExpert is now using HttpMixin RestAPIOutput is now using HttpMixin **Bot tests** GitHub API Collector is now using requests_mock instead of MagicMock (consistency) RestAPI Output is now using correct headers Fixes #2150 Fixes #2137 Signed-off-by: Sebastian Waldbauer --- CHANGELOG.md | 20 ++++++++ .../collectors/github_api/REQUIREMENTS.txt | 4 -- .../github_api/_collector_github_api.py | 16 ++---- .../collector_github_contents_api.py | 13 ++--- .../collectors/mail/collector_mail_url.py | 3 +- .../collectors/microsoft/collector_azure.py | 4 +- .../collectors/shodan/collector_stream.py | 5 +- intelmq/bots/experts/do_portal/expert.py | 20 ++------ intelmq/bots/experts/geohash/expert.py | 3 +- intelmq/bots/experts/http/expert_content.py | 7 ++- intelmq/bots/experts/http/expert_status.py | 6 +-- .../national_cert_contact_certat/expert.py | 14 ++---- intelmq/bots/experts/rdap/expert.py | 19 ++----- intelmq/bots/experts/ripe/expert.py | 23 ++------- .../experts/splunk_saved_search/expert.py | 19 ++----- intelmq/bots/experts/tuency/expert.py | 8 +-- intelmq/bots/outputs/elasticsearch/output.py | 9 +++- intelmq/bots/outputs/restapi/output.py | 20 ++------ intelmq/lib/bot.py | 37 -------------- intelmq/lib/mixins/http.py | 19 +++---- intelmq/lib/pipeline.py | 8 +-- .../collectors/github_api/test_collector.py | 50 ++++++------------- .../tests/bots/experts/ripe/test_expert.py | 30 +++++------ .../tests/bots/outputs/restapi/test_output.py | 13 +++-- 24 files changed, 125 insertions(+), 245 deletions(-) delete mode 100644 intelmq/bots/collectors/github_api/REQUIREMENTS.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 934b9405f..e28ca07a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,12 @@ CHANGELOG The `LogLevel` and `ReturnType` Enums were added to `intelmq.lib.datatypes`. - `intelmq.lib.bot`: - Enhance behaviour if an unconfigured bot is started (PR#2054 by Sebastian Wagner). + - Remove `http_*` variables and moved them into HTTPMixin (PR#2151 by Sebastian Waldbauer, fixes #2150). + - Remove `set_request_parameters` in favor of HTTPMixin (PR#2151 by Sebastian Waldbauer). +- `intelmq.lib.mixins.http`: + - Added missing variables types and simplified code (PR#2151 by Sebastian Waldbauer). +- `intelmq.lib.pipeline`: + - Removed trying to import requests, its a core library specified in setup.py (PR#2151 by Sebastian Waldbauer). ### Development @@ -39,6 +45,10 @@ CHANGELOG #### Collectors - `intelmq.bots.collectors.mail._lib`: Add support for unverified SSL/STARTTLS connections (PR#2055 by Sebastian Wagner). +- `intelmq.bots.collectors.github_api`: Removed requests dependency in favor of HttpMixin (PR#2151 by Sebastian Waldbauer). +- `intelmq.bots.collectors.collector_azure`: Added HttpMixin (PR#2151 by Sebastian Waldbauer). +- `intelmq.bots.collectors.shodan.collector_stream`: Removed `set_request_paramters()` in favor of HttpMixin (PR#2151 by Sebastian Waldbauer). + #### Parsers - `intelmq.bots.parsers.alienvault.parser_otx`: Save CVE data in `extra.cve` instead of `extra.CVE` due to the field name restriction on lower-case characters (PR#2059 by Sebastian Wagner). @@ -75,12 +85,21 @@ CHANGELOG - `intelmq.bots.experts.truncate_by_delimiter.expert`: Cut string if its length is higher than a maximum length (PR#1967 by Marius Karotkis). - `intelmq.bots.experts.remove_affix`: Remove prefix or postfix strings from a field (PR#1965 by Marius Karotkis). - `intelmq.bots.experts.asn_lookup.expert`: Fixes update-database script on the last few days of a month (PR#2121 by Filip Pokorný, fixes #2088). +- `intelmq.bots.experts.do_portal`: Removed requests dependency in favor of HTTPMixin (PR#2151 by Sebastian Waldbauer). +- `intelmq.bots.experts.http.*`: Using HTTPMixin instead of `create_request_session` (PR#2151 by Sebastian Waldbauer). +- `intelmq.bots.experts.national_cert_contact_certat`: Using HttpMixin instead of `requests` library (PR#2151 by Sebastian Waldbauer). +- `intelmq.bots.experts.rdap`: Removed requests dependency & `create_request_session` in favor of HTTPMixin (PR#2151 by Sebastian Waldbauer). +- `intelmq.bots.experts.ripe`: Simplified code & uses HTTPMixin (PR#2151 by Sebastian Waldbauer). +- `intelmq.bots.experts.splunk_saved_search`: Simplified & uses HTTPMixin (PR#2151 by Sebastian Waldbauer). +- `intelmq.bots.experts.tuency`: Removed `create_request_session` in favor of HTTPMixin (PR#2151 by Sebastian Waldbauer). #### Outputs - Removed `intelmq.bots.outputs.postgresql`: this bot was marked as deprecated in 2019 announced to be removed in version 3 of IntelMQ (PR#2045 by Birger Schacht). - Added `intelmq.bots.outputs.rpz_file.output` to create RPZ files (PR#1962 by Marius Karotkis). - Added `intelmq.bots.outputs.bro_file.output` to create Bro intel formatted files (PR#1963 by Marius Karotkis). - `intelmq.bots.outputs.templated_smtp.output`: Add new function `from_json()` (which just calls `json.loads()` in the standard Python environment), meaning the Templated SMTP output bot can take strings containing JSON documents and do the formatting itself (PR#2120 by Karl-Johan Karlsson). +- `intelmq.bots.outputs.elasticsearch`: Uses HttpMixin (PR#2151 by Sebastian Waldbauer). +- `intelmq.bots.outputs.restapi`: Using HttpMixin instead of importing `requests` (PR#2151 by Sebastian Waldbauer). ### Documentation - Feeds: Add documentation for newly supported dataplane feeds, see above (PR#2102 by Mikk Margus Möll). @@ -95,6 +114,7 @@ CHANGELOG - Also test on Python 3.10 (PR#2140 by Sebastian Wagner). - Switch from nosetests to pytest, as the former does not support Python 3.10 (PR#2140 by Sebastian Wagner). - CodeQL Github Actions `exponential backtracking on strings` fixed. (PR#2148 by Sebastian Waldbauer, fixes #2138) +- Replaced `MagicMock` & `patch` with `requests_mock` (PR#2151 by Sebastian Waldbauer). ### Tools diff --git a/intelmq/bots/collectors/github_api/REQUIREMENTS.txt b/intelmq/bots/collectors/github_api/REQUIREMENTS.txt deleted file mode 100644 index 03fa26c75..000000000 --- a/intelmq/bots/collectors/github_api/REQUIREMENTS.txt +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-FileCopyrightText: 2019 Tomas Bellus -# SPDX-License-Identifier: AGPL-3.0-or-later - -requests diff --git a/intelmq/bots/collectors/github_api/_collector_github_api.py b/intelmq/bots/collectors/github_api/_collector_github_api.py index 545670d79..8c8e72866 100644 --- a/intelmq/bots/collectors/github_api/_collector_github_api.py +++ b/intelmq/bots/collectors/github_api/_collector_github_api.py @@ -7,13 +7,10 @@ GITHUB API Collector bot """ import base64 +from requests import exceptions from intelmq.lib.bot import CollectorBot - -try: - import requests -except ImportError: - requests = None +from intelmq.lib.mixins import HttpMixin static_params = { 'headers': { @@ -22,14 +19,11 @@ } -class GithubAPICollectorBot(CollectorBot): +class GithubAPICollectorBot(CollectorBot, HttpMixin): basic_auth_username = None basic_auth_password = None def init(self): - if requests is None: - raise ValueError('Could not import requests. Please install it.') - self.__user_headers = static_params['headers'] if self.basic_auth_username is not None and self.basic_auth_password is not None: self.__user_headers.update(self.__produce_auth_header(self.basic_auth_username, self.basic_auth_password)) @@ -47,13 +41,13 @@ def process_request(self): def github_api(self, api_path: str, **kwargs) -> dict: try: - response = requests.get(f"{api_path}", params=kwargs, headers=self.__user_headers) + response = self.http_get(api_path, headers=self.__user_headers, params=kwargs) if response.status_code == 401: # bad credentials raise ValueError(response.json()['message']) else: return response.json() - except requests.RequestException: + except exceptions.RequestException: raise ValueError(f"Unknown repository {api_path!r}.") @staticmethod diff --git a/intelmq/bots/collectors/github_api/collector_github_contents_api.py b/intelmq/bots/collectors/github_api/collector_github_contents_api.py index f98efd34b..d9faa8933 100644 --- a/intelmq/bots/collectors/github_api/collector_github_contents_api.py +++ b/intelmq/bots/collectors/github_api/collector_github_contents_api.py @@ -14,17 +14,14 @@ 'regex': file regex (DEFAULT = '*.json') """ import re +from requests import exceptions from intelmq.lib.exceptions import InvalidArgument from intelmq.bots.collectors.github_api._collector_github_api import GithubAPICollectorBot +from intelmq.lib.mixins import HttpMixin -try: - import requests -except ImportError: - requests = None - -class GithubContentsAPICollectorBot(GithubAPICollectorBot): +class GithubContentsAPICollectorBot(GithubAPICollectorBot, HttpMixin): "Collect files from a GitHub repository via the API. Optionally with GitHub credentials." regex: str = None # TODO: could be re repository: str = None @@ -62,7 +59,7 @@ def process_request(self): if item['extra'] != {}: report.add('extra.file_metadata', item['extra']) self.send_message(report) - except requests.RequestException as e: + except exceptions.RequestException as e: raise ConnectionError(e) def __recurse_repository_files(self, base_api_url: str, extracted_github_files: list = None) -> list: @@ -75,7 +72,7 @@ def __recurse_repository_files(self, base_api_url: str, extracted_github_files: elif github_file['type'] == 'file' and bool(re.search(self.regex, github_file['name'])): extracted_github_file_data = { 'download_url': github_file['download_url'], - 'content': requests.get(github_file['download_url']).content, + 'content': self.http_get(github_file['download_url']).content, 'extra': {} } for field_name in self.__extra_fields: diff --git a/intelmq/bots/collectors/mail/collector_mail_url.py b/intelmq/bots/collectors/mail/collector_mail_url.py index 9d13ea5a6..6dfe7f4cc 100644 --- a/intelmq/bots/collectors/mail/collector_mail_url.py +++ b/intelmq/bots/collectors/mail/collector_mail_url.py @@ -8,6 +8,7 @@ """ import io import re +from requests import exceptions from intelmq.lib.mixins import HttpMixin from intelmq.lib.splitreports import generate_reports @@ -50,7 +51,7 @@ def process_message(self, uid, message): self.logger.info("Downloading report from %r.", url) try: resp = self.http_get(url) - except requests.exceptions.Timeout: + except exceptions.Timeout: self.logger.error("Request timed out %i times in a row." % self.http_timeout_max_tries) erroneous = True diff --git a/intelmq/bots/collectors/microsoft/collector_azure.py b/intelmq/bots/collectors/microsoft/collector_azure.py index c33fc7d9b..282577a19 100644 --- a/intelmq/bots/collectors/microsoft/collector_azure.py +++ b/intelmq/bots/collectors/microsoft/collector_azure.py @@ -11,7 +11,7 @@ from intelmq.lib.bot import CollectorBot from intelmq.lib.exceptions import MissingDependencyError -from intelmq.lib.mixins import CacheMixin +from intelmq.lib.mixins import CacheMixin, HttpMixin try: from azure.storage.blob import ContainerClient @@ -23,7 +23,7 @@ create_configuration = None # noqa -class MicrosoftAzureCollectorBot(CollectorBot, CacheMixin): +class MicrosoftAzureCollectorBot(CollectorBot, CacheMixin, HttpMixin): "Fetch data blobs from a Microsoft Azure container" connection_string: str = "" container_name: str = "" diff --git a/intelmq/bots/collectors/shodan/collector_stream.py b/intelmq/bots/collectors/shodan/collector_stream.py index aeed4ff26..d09b95260 100644 --- a/intelmq/bots/collectors/shodan/collector_stream.py +++ b/intelmq/bots/collectors/shodan/collector_stream.py @@ -19,6 +19,7 @@ from typing import List from intelmq.lib.bot import CollectorBot +from intelmq.lib.mixins import HttpMixin try: import shodan @@ -27,7 +28,7 @@ shodan = None -class ShodanStreamCollectorBot(CollectorBot): +class ShodanStreamCollectorBot(CollectorBot, HttpMixin): "Collect the Shodan stream from the Shodan API" api_key: str = "" countries: List[str] = [] @@ -36,7 +37,7 @@ def init(self): if shodan is None: raise ValueError("Library 'shodan' is needed but not installed.") - self.set_request_parameters() + self.setup() if tuple(int(v) for v in pkg_resources.get_distribution("shodan").version.split('.')) <= (1, 8, 1): if self.proxy: raise ValueError('Proxies are given but shodan-python > 1.8.1 is needed for proxy support.') diff --git a/intelmq/bots/experts/do_portal/expert.py b/intelmq/bots/experts/do_portal/expert.py index fecf4224a..2101c83d6 100644 --- a/intelmq/bots/experts/do_portal/expert.py +++ b/intelmq/bots/experts/do_portal/expert.py @@ -8,27 +8,18 @@ a "502 Bad Gateway" status code is treated the same as a timeout, i.e. will be retried instead of a fail. """ -try: - import requests -except ImportError: - requests = None - +from intelmq.lib.mixins import HttpMixin import intelmq.lib.utils as utils from intelmq.lib.bot import ExpertBot -class DoPortalExpertBot(ExpertBot): +class DoPortalExpertBot(ExpertBot, HttpMixin): """Retrieve abuse contact information for the source IP address from a do-portal instance""" mode: str = "append" portal_api_key: str = None portal_url: str = None def init(self): - if requests is None: - raise ValueError("Library 'requests' could not be loaded. Please install it.") - - self.set_request_parameters() - self.url = self.portal_url + '/api/1.0/ripe/contact?cidr=%s' self.http_header.update({ "Content-Type": "application/json", @@ -36,12 +27,7 @@ def init(self): "API-Authorization": self.portal_api_key }) - self.session = utils.create_request_session(self) - retries = requests.urllib3.Retry.from_int(self.http_timeout_max_tries) - retries.status_forcelist = [502] - adapter = requests.adapters.HTTPAdapter(max_retries=retries) - self.session.mount('http://', adapter) - self.session.mount('https://', adapter) + self.session = self.http_session() def process(self): event = self.receive_message() diff --git a/intelmq/bots/experts/geohash/expert.py b/intelmq/bots/experts/geohash/expert.py index c5c9ba724..e7a3b8fef 100644 --- a/intelmq/bots/experts/geohash/expert.py +++ b/intelmq/bots/experts/geohash/expert.py @@ -9,6 +9,7 @@ https://github.com/joyanujoy/geolib ''' from intelmq.lib.bot import ExpertBot +from intelmq.lib.exceptions import MissingDependencyError try: from geolib import geohash @@ -23,7 +24,7 @@ class GeohashExpertBot(ExpertBot): def init(self): if not geohash: - raise ValueError("Library 'geolib' is required, please install it.") + raise MissingDependencyError("geolib") def process(self): event = self.receive_message() diff --git a/intelmq/bots/experts/http/expert_content.py b/intelmq/bots/experts/http/expert_content.py index e7521e965..7c1f57c79 100644 --- a/intelmq/bots/experts/http/expert_content.py +++ b/intelmq/bots/experts/http/expert_content.py @@ -7,10 +7,10 @@ from typing import List from intelmq.lib.bot import ExpertBot -from intelmq.lib.utils import create_request_session +from intelmq.lib.mixins import HttpMixin -class HttpContentExpertBot(ExpertBot): +class HttpContentExpertBot(ExpertBot, HttpMixin): """ Test if a given string is part of the content for a given URL @@ -29,8 +29,7 @@ class HttpContentExpertBot(ExpertBot): __session = None def init(self): - self.set_request_parameters() - self.__session = create_request_session(self) + self.__session = self.http_session() def process(self): event = self.receive_message() diff --git a/intelmq/bots/experts/http/expert_status.py b/intelmq/bots/experts/http/expert_status.py index ef920dca8..d6bfeb0a1 100644 --- a/intelmq/bots/experts/http/expert_status.py +++ b/intelmq/bots/experts/http/expert_status.py @@ -8,9 +8,10 @@ from intelmq.lib.bot import ExpertBot from intelmq.lib.utils import create_request_session +from intelmq.lib.mixins import HttpMixin -class HttpStatusExpertBot(ExpertBot): +class HttpStatusExpertBot(ExpertBot, HttpMixin): """ Fetch the HTTP Status for a given URL @@ -31,8 +32,7 @@ def process(self): event = self.receive_message() if self.field in event: - self.set_request_parameters() - session = create_request_session(self) + session = self.http_session() try: response = session.get(event[self.field]) diff --git a/intelmq/bots/experts/national_cert_contact_certat/expert.py b/intelmq/bots/experts/national_cert_contact_certat/expert.py index b7a7035e2..4eb084a88 100644 --- a/intelmq/bots/experts/national_cert_contact_certat/expert.py +++ b/intelmq/bots/experts/national_cert_contact_certat/expert.py @@ -20,30 +20,22 @@ """ from intelmq.lib.bot import ExpertBot +from intelmq.lib.mixins import HttpMixin from intelmq.lib.utils import create_request_session from intelmq.lib.exceptions import MissingDependencyError -try: - import requests -except ImportError: - requests = None - URL = 'https://contacts.cert.at/cgi-bin/abuse-nationalcert.pl' -class NationalCERTContactCertATExpertBot(ExpertBot): +class NationalCERTContactCertATExpertBot(ExpertBot, HttpMixin): """Add country and abuse contact information from the CERT.at national CERT Contact Database. Set filter to true if you want to filter out events for Austria. Set overwrite_cc to true if you want to overwrite an existing country code value""" filter: bool = False http_verify_cert: bool = True overwrite_cc: bool = False def init(self): - if requests is None: - raise MissingDependencyError("requests") - - self.set_request_parameters() - self.session = create_request_session(self) + self.session = self.http_session() def process(self): event = self.receive_message() diff --git a/intelmq/bots/experts/rdap/expert.py b/intelmq/bots/experts/rdap/expert.py index a786b87f8..44aea579c 100644 --- a/intelmq/bots/experts/rdap/expert.py +++ b/intelmq/bots/experts/rdap/expert.py @@ -3,18 +3,13 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- +import requests from intelmq.lib.bot import ExpertBot from intelmq.lib.utils import create_request_session -from intelmq.lib.exceptions import MissingDependencyError -from intelmq.lib.mixins import CacheMixin +from intelmq.lib.mixins import CacheMixin, HttpMixin -try: - import requests -except ImportError: - requests = None - -class RDAPExpertBot(ExpertBot, CacheMixin): +class RDAPExpertBot(ExpertBot, CacheMixin, HttpMixin): """ Get RDAP data""" rdap_order: list = ['abuse', 'technical', 'administrative', 'registrant', 'registrar'] rdap_bootstrapped_servers: dict = {} @@ -30,11 +25,7 @@ class RDAPExpertBot(ExpertBot, CacheMixin): __session: requests.Session def init(self): - if requests is None: - raise MissingDependencyError("requests") - - self.set_request_parameters() - self.__session = create_request_session(self) + self.__session = self.http_session() # get overall rdap data from iana resp = self.__session.get('https://data.iana.org/rdap/dns.json') @@ -73,7 +64,7 @@ def process(self): if result: event.add('source.abuse_contact', result, overwrite=self.overwrite) else: - self.__session = create_request_session(self) + self.__session = self.http_session() domain_parts = url.split('.') domain_suffix = None while domain_suffix is None: diff --git a/intelmq/bots/experts/ripe/expert.py b/intelmq/bots/experts/ripe/expert.py index b347dfe2e..181072975 100644 --- a/intelmq/bots/experts/ripe/expert.py +++ b/intelmq/bots/experts/ripe/expert.py @@ -15,12 +15,7 @@ import intelmq.lib.utils as utils from intelmq.lib.bot import ExpertBot from intelmq.lib.exceptions import MissingDependencyError -from intelmq.lib.mixins import CacheMixin - -try: - import requests -except ImportError: - requests = None +from intelmq.lib.mixins import CacheMixin, HttpMixin STATUS_CODE_ERROR = 'HTTP status code was {}. Possible problem at the connection endpoint or network issue.' @@ -41,7 +36,7 @@ def clean_geo(geo_data): return geo_data -class RIPEExpertBot(ExpertBot, CacheMixin): +class RIPEExpertBot(ExpertBot, CacheMixin, HttpMixin): """Fetch abuse contact and/or geolocation information for the source and/or destination IP addresses and/or ASNs of the events""" mode: str = "append" query_ripe_db_asn: bool = True @@ -77,9 +72,6 @@ class RIPEExpertBot(ExpertBot, CacheMixin): } def init(self): - if requests is None: - raise MissingDependencyError("requests") - self.__query = { "db_asn": self.query_ripe_db_asn, "db_ip": self.query_ripe_db_ip, @@ -87,12 +79,7 @@ def init(self): "stat_ip": self.query_ripe_stat_ip, "stat_geo": self.query_ripe_stat_geolocation, } - - self.__initialize_http_session() - - def __initialize_http_session(self): - self.set_request_parameters() - self.http_session = utils.create_request_session(self) + self.session = self.http_session() def process(self): event = self.receive_message() @@ -134,8 +121,8 @@ def __perform_cached_query(self, type, resource): else: return json.loads(cached_value) else: - response = self.http_session.get(self.QUERY[type].format(resource), - data="", timeout=self.http_timeout_sec) + response = self.session.get(self.QUERY[type].format(resource), + data="", timeout=self.http_timeout_sec) if response.status_code != 200: if type == 'db_asn' and response.status_code == 404: diff --git a/intelmq/bots/experts/splunk_saved_search/expert.py b/intelmq/bots/experts/splunk_saved_search/expert.py index 94d85a0f7..244326328 100644 --- a/intelmq/bots/experts/splunk_saved_search/expert.py +++ b/intelmq/bots/experts/splunk_saved_search/expert.py @@ -56,19 +56,13 @@ intelmq.exceptions.KeyExists. """ - -try: - import requests -except ImportError: - requests = None - -import intelmq.lib.utils as utils +from intelmq.lib.mixins import HttpMixin from intelmq.lib.bot import ExpertBot -from intelmq.lib.exceptions import MissingDependencyError, ConfigurationError +from intelmq.lib.exceptions import ConfigurationError import time -class SplunkSavedSearchBot(ExpertBot): +class SplunkSavedSearchBot(ExpertBot, HttpMixin): """Enrich an event from Splunk search results""" auth_token: str = None multiple_result_handling = ["warn", "use_first", "send"] @@ -83,9 +77,6 @@ class SplunkSavedSearchBot(ExpertBot): _is_multithreadable = False def init(self): - if requests is None: - raise MissingDependencyError("requests") - if self.url is None: raise ConfigurationError("Connection", "No Splunk API URL specified") if self.auth_token is None: @@ -103,11 +94,9 @@ def init(self): if "ignore" in self.multiple_result_handling and "use_first" in self.multiple_result_handling: raise ConfigurationError("Processing", "Cannot both ignore and use multiple search results") - self.set_request_parameters() - self.http_header.update({"Authorization": f"Bearer {self.auth_token}"}) - self.session = utils.create_request_session(self) + self.session = self.http_session() self.session.keep_alive = False def update_event(self, event, search_result): diff --git a/intelmq/bots/experts/tuency/expert.py b/intelmq/bots/experts/tuency/expert.py index 15521cb90..b3139f6f8 100644 --- a/intelmq/bots/experts/tuency/expert.py +++ b/intelmq/bots/experts/tuency/expert.py @@ -17,18 +17,18 @@ {"ip":{"destinations":[{"source":"portal","name":"Thurner","contacts":[{"email":"test@example.vom"}]}]},"domain":{"destinations":[{"source":"portal","name":"Thurner","contacts":[{"email":"abuse@example.at"}]}]},"suppress":true,"interval":{"unit":"immediate","length":1}} """ +from intelmq.lib.mixins import HttpMixin from intelmq.lib.bot import ExpertBot -from intelmq.lib.utils import create_request_session, parse_relative +from intelmq.lib.utils import parse_relative -class TuencyExpertBot(ExpertBot): +class TuencyExpertBot(ExpertBot, HttpMixin): url: str # Path to the tuency instance authentication_token: str overwrite: bool = True def init(self): - self.set_request_parameters() - self.session = create_request_session(self) + self.session = self.http_session() self.session.headers["Authorization"] = f"Bearer {self.authentication_token}" self.url = f"{self.url}intelmq/lookup" diff --git a/intelmq/bots/outputs/elasticsearch/output.py b/intelmq/bots/outputs/elasticsearch/output.py index 85cc608c0..fdf1e3fce 100644 --- a/intelmq/bots/outputs/elasticsearch/output.py +++ b/intelmq/bots/outputs/elasticsearch/output.py @@ -16,6 +16,7 @@ from intelmq.lib.bot import OutputBot from intelmq.lib.exceptions import MissingDependencyError +from intelmq.lib.mixins import HttpMixin try: from elasticsearch import Elasticsearch @@ -55,7 +56,7 @@ def get_event_date(event_dict: dict) -> datetime.date: return event_date -class ElasticsearchOutputBot(OutputBot): +class ElasticsearchOutputBot(OutputBot, HttpMixin): """Send events to an Elasticsearch database server""" elastic_host: str = '127.0.0.1' # TODO: could be ipadd elastic_index: str = 'intelmq' @@ -77,7 +78,11 @@ def init(self): if isinstance(self.flatten_fields, str): self.flatten_fields = self.flatten_fields.split(',') - self.set_request_parameters() # Not all parameters set here are used by the ES client + self.auth = None + if self.http_password and self.http_username: + self.auth = (self.http_username, self.http_password) + + self.setup() # Not all parameters set here are used by the ES client self.es = Elasticsearch([{'host': self.elastic_host, 'port': self.elastic_port}], http_auth=self.auth, diff --git a/intelmq/bots/outputs/restapi/output.py b/intelmq/bots/outputs/restapi/output.py index ef7469703..b966cbfd4 100644 --- a/intelmq/bots/outputs/restapi/output.py +++ b/intelmq/bots/outputs/restapi/output.py @@ -4,18 +4,13 @@ # -*- coding: utf-8 -*- from typing import Iterable +from requests import exceptions -try: - import requests -except ImportError: - requests = None - -import intelmq.lib.utils as utils +from intelmq.lib.mixins.http import HttpMixin from intelmq.lib.bot import OutputBot -from intelmq.lib.exceptions import MissingDependencyError -class RestAPIOutputBot(OutputBot): +class RestAPIOutputBot(OutputBot, HttpMixin): """Send events to a REST API listener through HTTP POST""" auth_token_name: str = None auth_token: str = None @@ -27,10 +22,7 @@ class RestAPIOutputBot(OutputBot): _auth: Iterable[str] = None def init(self): - if requests is None: - raise MissingDependencyError("requests") - - self.set_request_parameters() + self.session = self.http_session() if self.auth_token_name and self.auth_token: if self.auth_type == 'http_header': @@ -40,8 +32,6 @@ def init(self): self.auth = self.auth_token_name, self.auth_token self.http_header.update({"Content-Type": "application/json; charset=utf-8"}) - - self.session = utils.create_request_session(self) self.session.keep_alive = False def process(self): @@ -58,7 +48,7 @@ def process(self): req = self.session.post(self.host, timeout=self.http_timeout_sec, **kwargs) - except requests.exceptions.Timeout: + except exceptions.Timeout: timeoutretries += 1 if req is None and timeoutretries >= self.http_timeout_max_tries: diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index 2bfe56756..2ecccdbb3 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -76,12 +76,6 @@ class Bot: error_max_retries: int = 3 error_procedure: str = "pass" error_retry_delay: int = 15 - http_proxy: Optional[str] = None - http_timeout_max_tries: int = 3 - http_timeout_sec: int = 30 - http_user_agent: str = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" - http_verify_cert: Union[bool, str] = True - https_proxy: Optional[str] = None instances_threads: int = 0 load_balance: bool = False log_processed_messages_count: int = 500 @@ -852,37 +846,6 @@ def run(cls, parsed_args=None): if not instance.is_multithreaded: instance.start() - def set_request_parameters(self): - self.http_header: dict = getattr(self, 'http_header', {}) - self.http_verify_cert: bool = getattr(self, 'http_verify_cert', True) - self.ssl_client_cert: Optional[str] = getattr(self, 'ssl_client_certificate', None) - - if (hasattr(self, 'http_username') and - hasattr(self, 'http_password') and - self.http_username): - self.auth = (self.http_username, - self.http_password) - else: - self.auth = None - - if self.http_proxy and self.https_proxy: - self.proxy = {'http': self.http_proxy, - 'https': self.https_proxy} - elif self.http_proxy or self.https_proxy: - self.logger.warning('Only %s_proxy seems to be set.' - 'Both http and https proxies must be set.', - 'http' if self.http_proxy else 'https') - self.proxy = {} - else: - self.proxy = {} - - self.http_timeout_sec: Optional[int] = getattr(self, 'http_timeout_sec', None) - self.http_timeout_max_tries: int = getattr(self, 'http_timeout_max_tries', 1) - # Be sure this is always at least 1 - self.http_timeout_max_tries = self.http_timeout_max_tries if self.http_timeout_max_tries >= 1 else 1 - - self.http_header['User-agent'] = self.http_user_agent - @staticmethod def check(parameters: dict) -> Optional[List[List[str]]]: """ diff --git a/intelmq/lib/mixins/http.py b/intelmq/lib/mixins/http.py index 6a6befb96..ae58d7cde 100644 --- a/intelmq/lib/mixins/http.py +++ b/intelmq/lib/mixins/http.py @@ -7,12 +7,8 @@ `set_request_parameters` in intelmq.lib.bot.Bot """ -from intelmq.lib.exceptions import MissingDependencyError - -try: - import requests -except ImportError: - requests = None +import requests +from typing import Optional class TimeoutHTTPAdapter(requests.adapters.HTTPAdapter): @@ -41,12 +37,12 @@ class HttpMixin: http_header: dict = {} http_user_agent: str = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" http_verify_cert: bool = True - http_proxy = None - https_proxy = None + http_proxy: Optional[str] = None + https_proxy: Optional[str] = None http_timeout_max_tries: int = 3 http_timeout_sec: int = 30 - http_username = None - http_password = None + http_username: str = None + http_password: str = None def __init__(self, **kwargs): self.logger.debug("Running HTTP Mixin initialization.") @@ -55,9 +51,6 @@ def __init__(self, **kwargs): def setup(self): self.logger.debug("Setting up HTTP Mixin.") - if requests is None: - raise MissingDependencyError("requests") - self.__session = requests.Session() # tls settings diff --git a/intelmq/lib/pipeline.py b/intelmq/lib/pipeline.py index cf58b453c..9cb2c21b4 100644 --- a/intelmq/lib/pipeline.py +++ b/intelmq/lib/pipeline.py @@ -7,6 +7,7 @@ from itertools import chain from typing import Dict, Optional import ssl +import requests import redis @@ -18,10 +19,6 @@ try: import pika - try: - import requests - except ImportError: - requests = None except ImportError: pika = None @@ -588,9 +585,6 @@ def _get_queues(self) -> dict: auth = (self.username, self.password) else: auth = ('guest', 'guest') - if requests is None: - self.logger.error("Library 'requests' is needed to get queue status. Please install it.") - return {} response = requests.get(self.monitoring_url + 'api/queues', auth=auth, timeout=5) if response.status_code == 401: diff --git a/intelmq/tests/bots/collectors/github_api/test_collector.py b/intelmq/tests/bots/collectors/github_api/test_collector.py index 6b87ea617..493935803 100644 --- a/intelmq/tests/bots/collectors/github_api/test_collector.py +++ b/intelmq/tests/bots/collectors/github_api/test_collector.py @@ -6,10 +6,11 @@ """ Testing Github API Collectors """ +from cgitb import text import json import os from unittest import TestCase, main as unittest_main -from unittest.mock import MagicMock, patch +import requests_mock import intelmq.lib.exceptions as exceptions import intelmq.lib.test as test @@ -78,24 +79,7 @@ } -def print_requests_get_parameters(url, *args, **kwargs): - if 'headers' in kwargs and kwargs['headers']['Accept'] == 'application/vnd.github.v3.text-match+json': - """ - mocking of Github API requests - """ - main_mock = MagicMock() - main_mock.return_value.json = MagicMock() - main_mock.return_value = RAW_CONTENTS - main_mock.json.return_value = JSON_CONTENTS - return main_mock - else: - """ - mocking of basic GET request - """ - main_mock = MagicMock(content=EXAMPLE_CONTENT_STR) - return main_mock - - +@requests_mock.Mocker() class TestGithubContentsAPICollectorBot(test.BotTestCase, TestCase): """ A TestCase for GithubContentsAPICollectorBot. @@ -105,9 +89,9 @@ class TestGithubContentsAPICollectorBot(test.BotTestCase, TestCase): def set_bot(cls): cls.bot_reference = collector_github_contents_api.GithubContentsAPICollectorBot - @patch('intelmq.bots.collectors.github_api.collector_github_contents_api.requests.get') - def test_message_queue_should_contain_the_right_fields(self, requests_get_mock): - requests_get_mock.side_effect = print_requests_get_parameters + def test_message_queue_should_contain_the_right_fields(self, mocker): + mocker.get("https://api.github.com/repos/{0}/contents".format(SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['CONFIG']['repository']), status_code=200, json=JSON_CONTENTS) + mocker.get("https://a_download.url/contents.txt", status_code=200, text=EXAMPLE_CONTENT_STR) self.run_bot(parameters=SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['CONFIG'], prepare=True) @@ -115,27 +99,23 @@ def test_message_queue_should_contain_the_right_fields(self, requests_get_mock): for i in range(len(self.get_output_queue())): self.assertMessageEqual(i, SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['EXPECTED_REPORTS'][i]) - @patch('intelmq.bots.collectors.github_api.collector_github_contents_api.requests.get') - def test_collector_should_fail_with_bad_repository_error(self, requests_get_mock): - import requests - requests_get_mock.side_effect = requests.RequestException() + def test_collector_should_fail_with_bad_repository_error(self, mocker): + mocker.get("https://api.github.com/repos/{0}/contents".format(SHOULD_FAIL_BECAUSE_REPOSITORY_IS_NOT_VALID_CONFIG['CONFIG']['repository'])) self.allowed_error_count = 1 # allow only single and final Error to be raised self.run_bot(parameters=SHOULD_FAIL_BECAUSE_REPOSITORY_IS_NOT_VALID_CONFIG['CONFIG'], prepare=True) self.assertRegexpMatchesLog(pattern=".*Unknown repository.*") # assert the expected ValueError msg - @patch('intelmq.bots.collectors.github_api.collector_github_contents_api.requests.get') - def test_collector_should_fail_with_bad_credentials(self, requests_get_mock): - requests_get_mock.return_value.json = MagicMock(return_value={'message': 'Bad Credentials'}) - requests_get_mock.return_value.configure_mock(status_code=401) + def test_collector_should_fail_with_bad_credentials(self, mocker): + mocker.get("https://api.github.com/repos/{0}/contents".format(SHOULD_FAIL_WITH_BAD_CREDENTIALS['CONFIG']['repository']), status_code=401, json={'message': 'Bad Credentials'}) self.allowed_error_count = 1 self.run_bot(parameters=SHOULD_FAIL_WITH_BAD_CREDENTIALS['CONFIG'], prepare=True) self.assertRegexpMatchesLog(pattern=".*Bad Credentials.*") - @patch('intelmq.bots.collectors.github_api.collector_github_contents_api.requests.get') - def test_adding_extra_fields_should_warn(self, requests_get_mock): - requests_get_mock.side_effect = print_requests_get_parameters + def test_adding_extra_fields_should_warn(self, mocker): + mocker.get("https://api.github.com/repos/{0}/contents".format(SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['CONFIG']['repository']), status_code=200, json=JSON_CONTENTS) + mocker.get("https://a_download.url/contents.txt", status_code=200, text=EXAMPLE_CONTENT_STR) custom_config = SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['CONFIG'].copy() custom_config['extra_fields'] = 'aaa,bbb' @@ -153,7 +133,9 @@ def test_adding_extra_fields_should_warn(self, requests_get_mock): "raw": utils.base64_encode(EXAMPLE_CONTENT_STR) }) - def test_collector_init_should_fail_with_invalid_argument(self): + def test_collector_init_should_fail_with_invalid_argument(self, mocker): + mocker.get("https://api.github.com/repos/{0}/contents".format(SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['CONFIG']['repository'])) + custom_config = SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['CONFIG'].copy() config_with_wrong_regex = custom_config.copy() diff --git a/intelmq/tests/bots/experts/ripe/test_expert.py b/intelmq/tests/bots/experts/ripe/test_expert.py index cf8717994..430a7aeb0 100644 --- a/intelmq/tests/bots/experts/ripe/test_expert.py +++ b/intelmq/tests/bots/experts/ripe/test_expert.py @@ -70,10 +70,10 @@ "source.geolocation.city": "Lansing", "source.geolocation.latitude": 42.7348, "source.geolocation.longitude": -84.6245 - } + } INDEX_ERROR = {"__type": "Event", "source.ip": "228.66.141.189", - } + } @test.skip_internet() class TestRIPEExpertBot(test.BotTestCase, unittest.TestCase): @@ -83,7 +83,7 @@ class TestRIPEExpertBot(test.BotTestCase, unittest.TestCase): def tearDown(self): if self.bot is not None: - self.bot.http_session.close() + pass @classmethod def set_bot(cls): @@ -94,7 +94,7 @@ def set_bot(cls): 'query_ripe_stat_asn': False, 'redis_cache_db': 4, 'query_ripe_stat_geolocation': False, - } + } cls.use_cache = True def test_db_ipv4_lookup(self): @@ -128,7 +128,7 @@ def test_ripe_stat_error_json(self, mocker): 'query_ripe_stat_asn': True, 'query_ripe_stat_ip': True, 'query_ripe_stat_geolocation': False, - } + } self.input_message = EMPTY_INPUT self.prepare_bot(parameters=parameters) old = self.bot.QUERY['stat'] @@ -186,11 +186,11 @@ def test_ripe_db_ip_errors(self, mocker): """ Test RIPE DB IP for errors. """ self.input_message = EXAMPLE_INPUT self.prepare_bot(parameters={'query_ripe_db_asn': False, - 'query_ripe_db_ip': True, - 'query_ripe_stat_ip': False, - 'query_ripe_stat_asn': False, - 'query_ripe_stat_geolocation': False, - }) + 'query_ripe_db_ip': True, + 'query_ripe_stat_ip': False, + 'query_ripe_stat_asn': False, + 'query_ripe_stat_geolocation': False, + }) old = self.bot.QUERY['db_ip'] self.bot.QUERY['db_ip'] = 'http://localhost/{}' mocker.get('/93.184.216.34', status_code=404) @@ -211,7 +211,7 @@ def test_replace(self): 'query_ripe_stat_ip': True, 'query_ripe_stat_asn': False, 'query_ripe_stat_geolocation': False, - }) + }) self.assertMessageEqual(0, EMPTY_REPLACED) self.assertEqual(self.cache.get('stat:127.0.0.1'), b'__no_contact') self.cache.flushdb() # collides with test_ripe_stat_errors @@ -224,7 +224,7 @@ def test_ripe_db_as_404(self): 'query_ripe_stat_ip': False, 'query_ripe_stat_asn': False, 'query_ripe_stat_geolocation': False, - }) + }) self.assertMessageEqual(0, DB_404_AS) @unittest.expectedFailure @@ -234,7 +234,7 @@ def test_geolocation(self): 'query_ripe_db_ip': False, 'query_ripe_stat_ip': False, 'query_ripe_stat_asn': True, - }) + }) self.assertMessageEqual(0, GEOLOCA_OUTPUT1) @unittest.expectedFailure @@ -245,7 +245,7 @@ def test_geolocation_overwrite(self): 'query_ripe_db_ip': False, 'query_ripe_stat_ip': False, 'query_ripe_stat_asn': True, - }) + }) self.assertMessageEqual(0, GEOLOCA_OUTPUT1) @unittest.expectedFailure @@ -255,7 +255,7 @@ def test_geolocation_not_overwrite(self): 'query_ripe_db_ip': False, 'query_ripe_stat_ip': False, 'query_ripe_stat_asn': True, - }) + }) self.assertMessageEqual(0, GEOLOCA_OUTPUT3) def test_index_error(self): diff --git a/intelmq/tests/bots/outputs/restapi/test_output.py b/intelmq/tests/bots/outputs/restapi/test_output.py index e272cf347..571d31abd 100644 --- a/intelmq/tests/bots/outputs/restapi/test_output.py +++ b/intelmq/tests/bots/outputs/restapi/test_output.py @@ -22,6 +22,7 @@ def callback(request, context): return callback +@requests_mock.Mocker() class TestRestAPIOutputBot(test.BotTestCase, unittest.TestCase): @classmethod @@ -36,26 +37,24 @@ def set_bot(cls): cls.default_input_message = {'__type': 'Event', 'source.ip': '10.0.0.1'} - @requests_mock.Mocker() def test_event(self, mocker): """ Test if data is posted correctly to webserver. """ mocker.post('http://localhost/', text=request_callback({'source': {'ip': '10.0.0.1'}}), - request_headers={'Authorization': 'Basic dXNlcm5hbWU6cGFzc3dvcmQ=', - 'Content-Type': 'application/json; charset=utf-8'}) + headers={'Authorization': 'Basic dXNlcm5hbWU6cGFzc3dvcmQ=', + 'Content-Type': 'application/json; charset=utf-8'}) self.run_bot() - @requests_mock.Mocker() def test_status_check(self, mocker): """ Test if response from webserver is correctly validated. """ mocker.post('http://localhost/', - status_code=500, - request_headers={'Authorization': 'Basic dXNlcm5hbWU6cGFzc3dvcmQ=', - 'Content-Type': 'application/json; charset=utf-8'}) + status_code=500, + headers={'Authorization': 'Basic dXNlcm5hbWU6cGFzc3dvcmQ=', + 'Content-Type': 'application/json; charset=utf-8'}) self.run_bot(allowed_error_count=1) self.assertLogMatches('requests.exceptions.HTTPError: 500 Server Error: None for url: http://localhost/', 'ERROR')