Skip to content

Commit 73519bc

Browse files
committed
refactor basic tokenizer
1 parent b1c63e1 commit 73519bc

File tree

1 file changed

+0
-2
lines changed

1 file changed

+0
-2
lines changed

bnlp/tokenizer/basic.py

-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ class BasicTokenizer:
6565
def tokenize(self, text):
6666
"""Tokenizes a piece of text."""
6767
text = convert_to_unicode(text)
68-
# handle (.) in bangla text
6968

7069
orig_tokens = whitespace_tokenize(text)
7170
# print("original tokens: ", orig_tokens)
@@ -75,7 +74,6 @@ def tokenize(self, text):
7574

7675
# print("split tokens: ", split_tokens)
7776
output_tokens = whitespace_tokenize(" ".join(split_tokens))
78-
# get (.) back in output tokens
7977
return output_tokens
8078

8179
def _run_strip_accents(self, text):

0 commit comments

Comments
 (0)