From 6ef9a0bf068c4be258339d37a76e2c2fc5681bc9 Mon Sep 17 00:00:00 2001
From: Pablo Emilio Escobar Gaviria <pablo-escobar@riseup.net>
Date: Mon, 1 Feb 2021 20:34:12 +0000
Subject: [PATCH 1/5] Documented stuff used by Mathics

---
 mathics_scanner/errors.py    |  8 +++--
 mathics_scanner/tokeniser.py | 62 +++++++++++++++++++++++++++++++-----
 2 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/mathics_scanner/errors.py b/mathics_scanner/errors.py
index 4ef27fb..438ea3b 100644
--- a/mathics_scanner/errors.py
+++ b/mathics_scanner/errors.py
@@ -3,17 +3,21 @@
 
 
 class TranslateError(Exception):
-    def __init__(self):
-        pass
+    """A generic class of tokenizing errors"""
+    pass
 
 
 class ScanError(TranslateError):
+    """A generic scanning error"""
     pass
 
 
 class InvalidSyntaxError(TranslateError):
+    """Invalid syntax"""
     pass
 
 
 class IncompleteSyntaxError(TranslateError):
+    """More characters were expected to form a valid token"""
     pass
+
diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
index a2bbeca..9717845 100644
--- a/mathics_scanner/tokeniser.py
+++ b/mathics_scanner/tokeniser.py
@@ -305,11 +305,22 @@ def compile_tokens(token_list):
 
 
 def is_symbol_name(text):
+    """
+    Returns ``True`` if ``text`` is a valid identifier. Otherwise returns
+    ``False``.
+    """
+    # Can't we just call match here?
     return full_symbol_pattern.sub("", text) == ""
 
 
 class Token(object):
+    "A representation of a Wolfram Language token"
     def __init__(self, tag, text, pos):
+        """
+        @param: tag  A string that indicates which type of token this is.
+        @param: text The actual contents of the token.
+        @param: pos  The position of the token in the input feed.
+        """
         self.tag = tag
         self.text = text
         self.pos = pos
@@ -326,28 +337,54 @@ def __repr__(self):
 
 
 class Tokeniser(object):
+    """
+    A tokenizer for the Wolfram Language.
+
+    When subclassing ``Tokeniser``, custom tokenisation rules can be defined by 
+    declaring methods whose names are preceded by ``t_``, such as in the 
+    following example: ::
+
+        class MyTokeniser(Tokeniser):
+            def t_MyWeirdRule(self, match):
+                # Your logic goes here...
+                pass
+
+    In this example, ``t_MyWeirdRule`` is supposed to update the internal state 
+    of the tokeniser and return a ``Token`` with an appropriate tag. ``m̀atch`` 
+    is expected to be an instance of ``re.Match``.
+    """
     modes = {
         "expr": (tokens, token_indices),
         "filename": (filename_tokens, {}),
     }
 
     def __init__(self, feeder):
+        """
+        @param: feeder An instance of ``LineFeeder`` which will feed characters
+                       to the tokenizer.
+        """
         self.pos = 0
         self.feeder = feeder
         self.prescanner = Prescanner(feeder)
         self.code = self.prescanner.scan()
         self.change_mode("expr")
 
+    # TODO: Turn this into a setter in the future?
     def change_mode(self, mode):
+        """
+        Set the mode of the tokenizer
+        """
         self.mode = mode
         self.tokens, self.token_indices = self.modes[mode]
 
+    # TODO: Rename this to something that remotetly makes sense?
     def incomplete(self):
-        "get more code from the prescanner and continue"
+        "Get more code from the prescanner and continue"
         self.prescanner.incomplete()
         self.code += self.prescanner.scan()
 
     def sntx_message(self, pos=None):
+        """Send a message to the feeder."""
         if pos is None:
             pos = self.pos
         pre, post = self.code[:pos], self.code[pos:].rstrip("\n")
@@ -356,8 +393,9 @@ def sntx_message(self, pos=None):
         else:
             self.feeder.message("Syntax", "sntxf", pre, post)
 
+    # TODO: Convert this to __next__ in the future?
     def next(self):
-        "return next token"
+        "Returns the next token"
         self.skip_blank()
         if self.pos >= len(self.code):
             return Token("END", "", len(self.code))
@@ -391,7 +429,7 @@ def next(self):
             return Token(tag, text, match.start(0))
 
     def skip_blank(self):
-        "skip whitespace and comments"
+        "Skip whitespace and comments"
         comment = []  # start positions of comments
         while True:
             if self.pos >= len(self.code):
@@ -417,6 +455,7 @@ def skip_blank(self):
                 break
 
     def t_String(self, match):
+        "``String`` tokenizer"
         start, end = self.pos, None
         self.pos += 1  # skip opening '"'
         newlines = []
@@ -444,6 +483,7 @@ def t_String(self, match):
         return Token("String", result, start)
 
     def t_Number(self, match):
+        "Number tag"
         text = match.group(0)
         pos = match.end(0)
         if self.code[pos - 1 : pos + 1] == "..":
@@ -454,7 +494,8 @@ def t_Number(self, match):
             self.pos = pos
         return Token("Number", text, match.start(0))
 
-    def token_mode(self, match, tag, mode):
+    # This isn't outside of here so it's considered internal
+    def _token_mode(self, match, tag, mode):
         "consume a token and switch mode"
         text = match.group(0)
         self.pos = match.end(0)
@@ -462,13 +503,18 @@ def token_mode(self, match, tag, mode):
         return Token(tag, text, match.start(0))
 
     def t_Get(self, match):
-        return self.token_mode(match, "Get", "filename")
+        "Get tag"
+        return self._token_mode(match, "Get", "filename")
 
     def t_Put(self, match):
-        return self.token_mode(match, "Put", "filename")
+        "Put tag"
+        return self._token_mode(match, "Put", "filename")
 
     def t_PutAppend(self, match):
-        return self.token_mode(match, "PutAppend", "filename")
+        "PutAppend tag"
+        return self._token_mode(match, "PutAppend", "filename")
 
     def t_Filename(self, match):
-        return self.token_mode(match, "Filename", "expr")
+        "Filename tag"
+        return self._token_mode(match, "Filename", "expr")
+

From cbfdd040a7b80bb55a65473b63720a93ad65ec22 Mon Sep 17 00:00:00 2001
From: Pablo Emilio Escobar Gaviria <pablo-escobar@riseup.net>
Date: Mon, 1 Feb 2021 21:04:22 +0000
Subject: [PATCH 2/5] Fixed typos

---
 mathics_scanner/tokeniser.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
index 9717845..c6fb01e 100644
--- a/mathics_scanner/tokeniser.py
+++ b/mathics_scanner/tokeniser.py
@@ -338,7 +338,7 @@ def __repr__(self):
 
 class Tokeniser(object):
     """
-    A tokenizer for the Wolfram Language.
+    A tokeniser for the Wolfram Language.
 
     When subclassing ``Tokeniser``, custom tokenisation rules can be defined by 
     declaring methods whose names are preceded by ``t_``, such as in the 
@@ -361,7 +361,7 @@ def t_MyWeirdRule(self, match):
     def __init__(self, feeder):
         """
         @param: feeder An instance of ``LineFeeder`` which will feed characters
-                       to the tokenizer.
+                       to the tokeniser.
         """
         self.pos = 0
         self.feeder = feeder
@@ -372,7 +372,7 @@ def __init__(self, feeder):
     # TODO: Turn this into a setter in the future?
     def change_mode(self, mode):
         """
-        Set the mode of the tokenizer
+        Set the mode of the tokeniser
         """
         self.mode = mode
         self.tokens, self.token_indices = self.modes[mode]
@@ -455,7 +455,7 @@ def skip_blank(self):
                 break
 
     def t_String(self, match):
-        "``String`` tokenizer"
+        "String rule"
         start, end = self.pos, None
         self.pos += 1  # skip opening '"'
         newlines = []
@@ -483,7 +483,7 @@ def t_String(self, match):
         return Token("String", result, start)
 
     def t_Number(self, match):
-        "Number tag"
+        "Number rule"
         text = match.group(0)
         pos = match.end(0)
         if self.code[pos - 1 : pos + 1] == "..":
@@ -503,18 +503,18 @@ def _token_mode(self, match, tag, mode):
         return Token(tag, text, match.start(0))
 
     def t_Get(self, match):
-        "Get tag"
+        "Get rule"
         return self._token_mode(match, "Get", "filename")
 
     def t_Put(self, match):
-        "Put tag"
+        "Put rule"
         return self._token_mode(match, "Put", "filename")
 
     def t_PutAppend(self, match):
-        "PutAppend tag"
+        "PutAppend rule"
         return self._token_mode(match, "PutAppend", "filename")
 
     def t_Filename(self, match):
-        "Filename tag"
+        "Filename rule"
         return self._token_mode(match, "Filename", "expr")
 

From 03ea0c604fedee428a624d52cbe926ab350aa336 Mon Sep 17 00:00:00 2001
From: Pablo Emilio Escobar Gaviria <pablo-escobar@riseup.net>
Date: Tue, 2 Feb 2021 13:04:24 +0000
Subject: [PATCH 3/5] Fixed typo

---
 mathics_scanner/errors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mathics_scanner/errors.py b/mathics_scanner/errors.py
index 438ea3b..ac8c0c7 100644
--- a/mathics_scanner/errors.py
+++ b/mathics_scanner/errors.py
@@ -3,7 +3,7 @@
 
 
 class TranslateError(Exception):
-    """A generic class of tokenizing errors"""
+    """A generic class of tokenization errors"""
     pass
 
 

From 73e8a43eca50299d1aa1e6b7aedc6d2f853653fc Mon Sep 17 00:00:00 2001
From: Pablo Emilio Escobar Gaviria <pablo-escobar@riseup.net>
Date: Tue, 2 Feb 2021 13:19:23 +0000
Subject: [PATCH 4/5] Documented the feeders

---
 mathics_scanner/feed.py | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/mathics_scanner/feed.py b/mathics_scanner/feed.py
index 07c5251..6714b64 100644
--- a/mathics_scanner/feed.py
+++ b/mathics_scanner/feed.py
@@ -8,7 +8,16 @@
 
 
 class LineFeeder(metaclass=ABCMeta):
+    """
+    An abstract representation for a feeder. The purpose of a feeder is to 
+    mediate the consumption of characters between the tokeniser and the actual 
+    file being scaned, as well to store messages regarding tokenization errors.
+    """
     def __init__(self, filename):
+        """
+        @param: filename A string that describes the source of the feeder, i.e.
+                         the filename that is being feed.
+        """
         self.messages = []
         self.lineno = 0
         self.filename = filename
@@ -29,6 +38,9 @@ def empty(self):
         return
 
     def message(self, sym, tag, *args):
+        """
+        Append a generic message of type ``sym`` to the message queue.
+        """
         if sym == "Syntax":
             message = self.syntax_message(sym, tag, *args)
         else:
@@ -36,6 +48,9 @@ def message(self, sym, tag, *args):
         self.messages.append(message)
 
     def syntax_message(self, sym, tag, *args):
+        """
+        Append a message concerning syntax errors to the message queue.
+        """
         if len(args) > 3:
             raise ValueError("Too many args.")
         message = [sym, tag]
@@ -49,6 +64,7 @@ def syntax_message(self, sym, tag, *args):
         assert len(message) == 7
         return message
 
+    # TODO: Rethink this (this is only usefull for core, not anyone else)
     def send_messages(self, evaluation):
         for message in self.messages:
             evaluation.message(*message)
@@ -56,9 +72,14 @@ def send_messages(self, evaluation):
 
 
 class MultiLineFeeder(LineFeeder):
-    "Feeds one line at a time."
+    "A feeder that feeds one line at a time."
 
     def __init__(self, lines, filename=""):
+        """
+        @param: lines    The source of the feeder (a string).
+        @param: filename A string that describes the source of the feeder, i.e.
+                         the filename that is being feed.
+        """
         super(MultiLineFeeder, self).__init__(filename)
         self.lineno = 0
         if isinstance(lines, str):
@@ -79,9 +100,14 @@ def empty(self):
 
 
 class SingleLineFeeder(LineFeeder):
-    "Feeds all the code as a single line."
+    "A feeder that feeds all the code as a single line."
 
     def __init__(self, code, filename=""):
+        """
+        @param: code     The source of the feeder (a string).
+        @param: filename A string that describes the source of the feeder, i.e.
+                         the filename that is being feed.
+        """
         super().__init__(filename)
         self.code = code
         self._empty = False
@@ -98,9 +124,14 @@ def empty(self):
 
 
 class FileLineFeeder(LineFeeder):
-    "Feeds lines from an open file object"
+    "A feeder that feeds lines from an open ``File`` object"
 
     def __init__(self, fileobject, trace_fn=None):
+        """
+        @param: fileobject The source of the feeder (a string).
+        @param: filename   A string that describes the source of the feeder,
+                           i.e.  the filename that is being feed.
+        """
         super().__init__(fileobject.name)
         self.fileobject = fileobject
         self.lineno = 0
@@ -122,3 +153,4 @@ def feed(self):
 
     def empty(self):
         return self.eof
+

From dbae4b3e0fe5c1a230724ce6e30cbf353e04f2ba Mon Sep 17 00:00:00 2001
From: Pablo Emilio Escobar Gaviria <pablo-escobar@riseup.net>
Date: Tue, 2 Feb 2021 13:54:38 +0000
Subject: [PATCH 5/5] Marked adtional methods of Tokenise as private

This methods are only useful internally and are not used by core anywhere
---
 mathics_scanner/tokeniser.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
index c6fb01e..958efa2 100644
--- a/mathics_scanner/tokeniser.py
+++ b/mathics_scanner/tokeniser.py
@@ -367,10 +367,9 @@ def __init__(self, feeder):
         self.feeder = feeder
         self.prescanner = Prescanner(feeder)
         self.code = self.prescanner.scan()
-        self.change_mode("expr")
+        self._change_mode("expr")
 
-    # TODO: Turn this into a setter in the future?
-    def change_mode(self, mode):
+    def _change_mode(self, mode):
         """
         Set the mode of the tokeniser
         """
@@ -396,7 +395,7 @@ def sntx_message(self, pos=None):
     # TODO: Convert this to __next__ in the future?
     def next(self):
         "Returns the next token"
-        self.skip_blank()
+        self._skip_blank()
         if self.pos >= len(self.code):
             return Token("END", "", len(self.code))
 
@@ -428,7 +427,7 @@ def next(self):
             self.pos = match.end(0)
             return Token(tag, text, match.start(0))
 
-    def skip_blank(self):
+    def _skip_blank(self):
         "Skip whitespace and comments"
         comment = []  # start positions of comments
         while True:
@@ -499,7 +498,7 @@ def _token_mode(self, match, tag, mode):
         "consume a token and switch mode"
         text = match.group(0)
         self.pos = match.end(0)
-        self.change_mode(mode)
+        self._change_mode(mode)
         return Token(tag, text, match.start(0))
 
     def t_Get(self, match):