Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Protect community from confusable homoglyphs #1470

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
21 changes: 21 additions & 0 deletions liberapay/models/community.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from postgres.orm import Model
from psycopg2 import IntegrityError
from confusable_homoglyphs import confusables

from liberapay.exceptions import CommunityAlreadyExists, InvalidCommunityName

Expand Down Expand Up @@ -38,8 +39,17 @@ def create(cls, name, creator_id, lang='mul'):
name = unicodedata.normalize('NFKC', name)
if name_re.match(name) is None:
raise InvalidCommunityName(name)

try:
with cls.db.get_cursor() as cursor:
all_names = cursor.all("""
SELECT name
FROM communities
""")
for existing_name in all_names:
if cls._unconfusable(name) == cls._unconfusable(existing_name):
raise CommunityAlreadyExists

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a very inefficient implementation, the more communities there are the slower it becomes. Also you're repeating an identical function call (cls._unconfusable(name)) in every loop iteration.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed here: 9b496bc

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You only fixed the repeated call, not the wider issue: this implementation is still inefficient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which part is the most inefficient:

  • the database query?
  • the unconfusable lookup?
  • the unconfusable_string function?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fetching of all the existing community names from the database and the subsequent loop. That's linear complexity, O(n), with n being the number of existing communities.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably do something like this instead:

                possible_collisions = get_confusables(name)
                possible_collisions.append(name)
                collides_with = cursor.one("""
                    SELECT name
                      FROM communities
                     WHERE name IN %s
                     LIMIT 1
                """, (tuple(possible_collisions),))
                if collides_with:
                    raise CommunityAlreadyExists(collides_with)

p_id = cursor.one("""
INSERT INTO participants
(kind, status, join_time)
Expand Down Expand Up @@ -91,3 +101,14 @@ def check_membership_status(self, participant):
@property
def nsubscribers(self):
return self.participant.nsubscribers

@staticmethod
def _unconfusable(name):
unconfusable_name = ''
for c in name:
confusable = confusables.is_confusable(c, preferred_aliases=['COMMON', 'LATIN'])
if confusable:
# if the character is confusable we replace it with the first prefered alias
c = confusable[0]['homoglyphs'][0]['c']
unconfusable_name += c
return unconfusable_name
3 changes: 3 additions & 0 deletions requirements_base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,6 @@ cryptography==2.4.2 \
asn1crypto==0.24.0 \
--hash=sha256:2f1adbb7546ed199e3c90ef23ec95c5cf3585bac7d11fb7eb562a3fe89c64e87 \
--hash=sha256:9d5c20441baf0cb60a4ac34cc447c6c189024b6b4c6cd7877034f4965c464e49
confusable_homoglyphs==3.2.0 \
--hash=sha256:3b4a0d9fa510669498820c91a0bfc0c327568cecec90648cf3819d4a6fc6a751 \
--hash=sha256:e3ce611028d882b74a5faa69e3cbb5bd4dcd9f69936da6e73d33eda42c917944
29 changes: 28 additions & 1 deletion tests/py/test_communities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import json

from liberapay.exceptions import AuthRequired
from liberapay.exceptions import (
AuthRequired,
CommunityAlreadyExists,
)
from liberapay.models.community import Community
from liberapay.testing import Harness

Expand Down Expand Up @@ -101,6 +104,30 @@ def test_join_and_leave(self):
communities = self.bob.get_communities()
assert len(communities) == 0

def test_create_community_already_taken(self):
with self.assertRaises(CommunityAlreadyExists):
Community.create('test', self.alice.id)

def test_create_community_already_taken_is_case_insensitive(self):
with self.assertRaises(CommunityAlreadyExists):
Community.create('TeSt', self.alice.id)

def test_unconfusable(self):
self.assertEqual('user2', Community._unconfusable('user2'))
self.assertEqual('alice', Community._unconfusable('alice'))
latin_string = 'AlaskaJazz'
mixed_string = 'ΑlaskaJazz'
self.assertNotEqual(latin_string, mixed_string)
self.assertEqual(latin_string, Community._unconfusable(mixed_string))

def test_create_community_already_taken_with_confusable_homoglyphs(self):
latin_string = 'AlaskaJazz'
mixed_string = 'ΑlaskaJazz'

Community.create(latin_string, self.bob.id)
with self.assertRaises(CommunityAlreadyExists):
Community.create(mixed_string, self.alice.id)


class TestCommunityEdit(Harness):

Expand Down