Merge pull request #47 from cjwatson/urinorm-query-unicode

Fix normalization of non-ASCII query strings on Python 2
openid · Sep 2, 2020 · afa6ada · afa6ada
2 parents d093a09 + a2cb8bc
commit afa6ada
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 2 deletions.
diff --git a/openid/test/test_urinorm.py b/openid/test/test_urinorm.py
@@ -82,6 +82,14 @@ def test_path_keep_sub_delims(self):
     def test_path_percent_decode_sub_delims(self):
         self.assertEqual(urinorm('http://example.com/foo%2B%21bar'), 'http://example.com/foo+!bar')
 
+    def test_query_encoding(self):
+        self.assertEqual(
+            urinorm('http://example.com/?openid.sreg.fullname=Unícöde+Person'),
+            'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person')
+        self.assertEqual(
+            urinorm('http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person'),
+            'http://example.com/?openid.sreg.fullname=Un%C3%ADc%C3%B6de+Person')
+
     def test_illegal_characters(self):
         six.assertRaisesRegex(self, ValueError, 'Illegal characters in URI', urinorm, 'http://<illegal>.com/')
 

diff --git a/openid/urinorm.py b/openid/urinorm.py
@@ -132,8 +132,14 @@ def urinorm(uri):
         path = '/'
     _check_disallowed_characters(path, 'path')
 
-    # Normalize query
-    data = parse_qsl(split_uri.query)
+    # Normalize query.  On Python 2, `urlencode` without `doseq=True`
+    # requires values to be convertible to native strings using `str()`.
+    if isinstance(split_uri.query, str):
+        # Python 3 branch
+        data = parse_qsl(split_uri.query)
+    else:
+        # Python 2 branch
+        data = parse_qsl(split_uri.query.encode('utf-8'))
     query = urlencode(data)
     _check_disallowed_characters(query, 'query')