From fd7cd4d5c991a7a6620d432781287dd25f1520d4 Mon Sep 17 00:00:00 2001 From: Scott Kitterman Date: Tue, 2 Nov 2021 11:49:38 -0400 Subject: [PATCH] Add option to exclude private PSL names and associated test Signed-off-by: Scott Kitterman --- src/publicsuffix2/__init__.py | 10 +++++++--- tests.py | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/publicsuffix2/__init__.py b/src/publicsuffix2/__init__.py index 6c1d088..1323ea3 100644 --- a/src/publicsuffix2/__init__.py +++ b/src/publicsuffix2/__init__.py @@ -62,7 +62,7 @@ class PublicSuffixList(object): - def __init__(self, psl_file=None, idna=True): + def __init__(self, psl_file=None, idna=True, private=True): """ Read and parse a public suffix list. `psl_file` is either a file location string, or a file-like object, or an iterable of lines from a @@ -80,6 +80,7 @@ def __init__(self, psl_file=None, idna=True): :param psl_file: string or None :param idna: boolean, whether to convert file to IDNA-encoded strings + :param private: boolean, include non-ICANN private names, default=True """ # Note: we test for None as we accept empty lists as inputs if psl_file is None or isinstance(psl_file, str): @@ -91,7 +92,7 @@ def __init__(self, psl_file=None, idna=True): # a list of eTLDs with their modifiers, e.g., * self.tlds = [] - root = self._build_structure(psl, idna) + root = self._build_structure(psl, idna, private) self.root = self._simplify(root) def _find_node(self, parent, parts): @@ -161,7 +162,7 @@ def _simplify(self, node): return (node[0], dict((k, self._simplify(v)) for (k, v) in node[1].items())) - def _build_structure(self, fp, idna): + def _build_structure(self, fp, idna, private): """ Build a Trie from the public suffix list. If idna==True, idna-encode each line before building. @@ -180,6 +181,7 @@ def _build_structure(self, fp, idna): :param fp: pointer for the public suffix list :param idna: boolean, convert lines to idna-encoded strings + :param private: boolean, include non-ICANN private names, default=True :return: Trie """ root = [0] @@ -188,6 +190,8 @@ def _build_structure(self, fp, idna): for line in fp: line = line.strip() + if not private and line.startswith('// ===BEGIN PRIVATE'): + break if not line or line.startswith('//'): continue if idna: diff --git a/tests.py b/tests.py index 39fc60d..eda49b7 100644 --- a/tests.py +++ b/tests.py @@ -256,6 +256,10 @@ def test_PublicSuffixList_tlds_is_loaded_correctly(self): psl = publicsuffix.PublicSuffixList() assert psl.tlds + def test_get_tld_no_private(self): + psl = publicsuffix.PublicSuffixList(private=False) + # Without private, ap-northeast-1.elasticbeanstalk.com is com + assert 'com' == psl.get_tld('ap-northeast-1.elasticbeanstalk.com') class TestPublicSuffixGetSld(unittest.TestCase):