Skip to content

Add option to exclude private PSL names and associated test #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/publicsuffix2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@

class PublicSuffixList(object):

def __init__(self, psl_file=None, idna=True):
def __init__(self, psl_file=None, idna=True, private=True):
"""
Read and parse a public suffix list. `psl_file` is either a file
location string, or a file-like object, or an iterable of lines from a
Expand All @@ -80,6 +80,7 @@ def __init__(self, psl_file=None, idna=True):

:param psl_file: string or None
:param idna: boolean, whether to convert file to IDNA-encoded strings
:param private: boolean, include non-ICANN private names, default=True
"""
# Note: we test for None as we accept empty lists as inputs
if psl_file is None or isinstance(psl_file, str):
Expand All @@ -91,7 +92,7 @@ def __init__(self, psl_file=None, idna=True):

# a list of eTLDs with their modifiers, e.g., *
self.tlds = []
root = self._build_structure(psl, idna)
root = self._build_structure(psl, idna, private)
self.root = self._simplify(root)

def _find_node(self, parent, parts):
Expand Down Expand Up @@ -161,7 +162,7 @@ def _simplify(self, node):

return (node[0], dict((k, self._simplify(v)) for (k, v) in node[1].items()))

def _build_structure(self, fp, idna):
def _build_structure(self, fp, idna, private):
"""
Build a Trie from the public suffix list. If idna==True, idna-encode
each line before building.
Expand All @@ -180,6 +181,7 @@ def _build_structure(self, fp, idna):

:param fp: pointer for the public suffix list
:param idna: boolean, convert lines to idna-encoded strings
:param private: boolean, include non-ICANN private names, default=True
:return: Trie
"""
root = [0]
Expand All @@ -188,6 +190,8 @@ def _build_structure(self, fp, idna):

for line in fp:
line = line.strip()
if not private and line.startswith('// ===BEGIN PRIVATE'):
break
if not line or line.startswith('//'):
continue
if idna:
Expand Down
4 changes: 4 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ def test_PublicSuffixList_tlds_is_loaded_correctly(self):
psl = publicsuffix.PublicSuffixList()
assert psl.tlds

def test_get_tld_no_private(self):
psl = publicsuffix.PublicSuffixList(private=False)
# Without private, ap-northeast-1.elasticbeanstalk.com is com
assert 'com' == psl.get_tld('ap-northeast-1.elasticbeanstalk.com')

class TestPublicSuffixGetSld(unittest.TestCase):

Expand Down