diff --git a/mathics_scanner/errors.py b/mathics_scanner/errors.py index 4ef27fb..ac8c0c7 100644 --- a/mathics_scanner/errors.py +++ b/mathics_scanner/errors.py @@ -3,17 +3,21 @@ class TranslateError(Exception): - def __init__(self): - pass + """A generic class of tokenization errors""" + pass class ScanError(TranslateError): + """A generic scanning error""" pass class InvalidSyntaxError(TranslateError): + """Invalid syntax""" pass class IncompleteSyntaxError(TranslateError): + """More characters were expected to form a valid token""" pass + diff --git a/mathics_scanner/feed.py b/mathics_scanner/feed.py index 07c5251..6714b64 100644 --- a/mathics_scanner/feed.py +++ b/mathics_scanner/feed.py @@ -8,7 +8,16 @@ class LineFeeder(metaclass=ABCMeta): + """ + An abstract representation for a feeder. The purpose of a feeder is to + mediate the consumption of characters between the tokeniser and the actual + file being scaned, as well to store messages regarding tokenization errors. + """ def __init__(self, filename): + """ + @param: filename A string that describes the source of the feeder, i.e. + the filename that is being feed. + """ self.messages = [] self.lineno = 0 self.filename = filename @@ -29,6 +38,9 @@ def empty(self): return def message(self, sym, tag, *args): + """ + Append a generic message of type ``sym`` to the message queue. + """ if sym == "Syntax": message = self.syntax_message(sym, tag, *args) else: @@ -36,6 +48,9 @@ def message(self, sym, tag, *args): self.messages.append(message) def syntax_message(self, sym, tag, *args): + """ + Append a message concerning syntax errors to the message queue. + """ if len(args) > 3: raise ValueError("Too many args.") message = [sym, tag] @@ -49,6 +64,7 @@ def syntax_message(self, sym, tag, *args): assert len(message) == 7 return message + # TODO: Rethink this (this is only usefull for core, not anyone else) def send_messages(self, evaluation): for message in self.messages: evaluation.message(*message) @@ -56,9 +72,14 @@ def send_messages(self, evaluation): class MultiLineFeeder(LineFeeder): - "Feeds one line at a time." + "A feeder that feeds one line at a time." def __init__(self, lines, filename=""): + """ + @param: lines The source of the feeder (a string). + @param: filename A string that describes the source of the feeder, i.e. + the filename that is being feed. + """ super(MultiLineFeeder, self).__init__(filename) self.lineno = 0 if isinstance(lines, str): @@ -79,9 +100,14 @@ def empty(self): class SingleLineFeeder(LineFeeder): - "Feeds all the code as a single line." + "A feeder that feeds all the code as a single line." def __init__(self, code, filename=""): + """ + @param: code The source of the feeder (a string). + @param: filename A string that describes the source of the feeder, i.e. + the filename that is being feed. + """ super().__init__(filename) self.code = code self._empty = False @@ -98,9 +124,14 @@ def empty(self): class FileLineFeeder(LineFeeder): - "Feeds lines from an open file object" + "A feeder that feeds lines from an open ``File`` object" def __init__(self, fileobject, trace_fn=None): + """ + @param: fileobject The source of the feeder (a string). + @param: filename A string that describes the source of the feeder, + i.e. the filename that is being feed. + """ super().__init__(fileobject.name) self.fileobject = fileobject self.lineno = 0 @@ -122,3 +153,4 @@ def feed(self): def empty(self): return self.eof + diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py index a2bbeca..958efa2 100644 --- a/mathics_scanner/tokeniser.py +++ b/mathics_scanner/tokeniser.py @@ -305,11 +305,22 @@ def compile_tokens(token_list): def is_symbol_name(text): + """ + Returns ``True`` if ``text`` is a valid identifier. Otherwise returns + ``False``. + """ + # Can't we just call match here? return full_symbol_pattern.sub("", text) == "" class Token(object): + "A representation of a Wolfram Language token" def __init__(self, tag, text, pos): + """ + @param: tag A string that indicates which type of token this is. + @param: text The actual contents of the token. + @param: pos The position of the token in the input feed. + """ self.tag = tag self.text = text self.pos = pos @@ -326,28 +337,53 @@ def __repr__(self): class Tokeniser(object): + """ + A tokeniser for the Wolfram Language. + + When subclassing ``Tokeniser``, custom tokenisation rules can be defined by + declaring methods whose names are preceded by ``t_``, such as in the + following example: :: + + class MyTokeniser(Tokeniser): + def t_MyWeirdRule(self, match): + # Your logic goes here... + pass + + In this example, ``t_MyWeirdRule`` is supposed to update the internal state + of the tokeniser and return a ``Token`` with an appropriate tag. ``m̀atch`` + is expected to be an instance of ``re.Match``. + """ modes = { "expr": (tokens, token_indices), "filename": (filename_tokens, {}), } def __init__(self, feeder): + """ + @param: feeder An instance of ``LineFeeder`` which will feed characters + to the tokeniser. + """ self.pos = 0 self.feeder = feeder self.prescanner = Prescanner(feeder) self.code = self.prescanner.scan() - self.change_mode("expr") + self._change_mode("expr") - def change_mode(self, mode): + def _change_mode(self, mode): + """ + Set the mode of the tokeniser + """ self.mode = mode self.tokens, self.token_indices = self.modes[mode] + # TODO: Rename this to something that remotetly makes sense? def incomplete(self): - "get more code from the prescanner and continue" + "Get more code from the prescanner and continue" self.prescanner.incomplete() self.code += self.prescanner.scan() def sntx_message(self, pos=None): + """Send a message to the feeder.""" if pos is None: pos = self.pos pre, post = self.code[:pos], self.code[pos:].rstrip("\n") @@ -356,9 +392,10 @@ def sntx_message(self, pos=None): else: self.feeder.message("Syntax", "sntxf", pre, post) + # TODO: Convert this to __next__ in the future? def next(self): - "return next token" - self.skip_blank() + "Returns the next token" + self._skip_blank() if self.pos >= len(self.code): return Token("END", "", len(self.code)) @@ -390,8 +427,8 @@ def next(self): self.pos = match.end(0) return Token(tag, text, match.start(0)) - def skip_blank(self): - "skip whitespace and comments" + def _skip_blank(self): + "Skip whitespace and comments" comment = [] # start positions of comments while True: if self.pos >= len(self.code): @@ -417,6 +454,7 @@ def skip_blank(self): break def t_String(self, match): + "String rule" start, end = self.pos, None self.pos += 1 # skip opening '"' newlines = [] @@ -444,6 +482,7 @@ def t_String(self, match): return Token("String", result, start) def t_Number(self, match): + "Number rule" text = match.group(0) pos = match.end(0) if self.code[pos - 1 : pos + 1] == "..": @@ -454,21 +493,27 @@ def t_Number(self, match): self.pos = pos return Token("Number", text, match.start(0)) - def token_mode(self, match, tag, mode): + # This isn't outside of here so it's considered internal + def _token_mode(self, match, tag, mode): "consume a token and switch mode" text = match.group(0) self.pos = match.end(0) - self.change_mode(mode) + self._change_mode(mode) return Token(tag, text, match.start(0)) def t_Get(self, match): - return self.token_mode(match, "Get", "filename") + "Get rule" + return self._token_mode(match, "Get", "filename") def t_Put(self, match): - return self.token_mode(match, "Put", "filename") + "Put rule" + return self._token_mode(match, "Put", "filename") def t_PutAppend(self, match): - return self.token_mode(match, "PutAppend", "filename") + "PutAppend rule" + return self._token_mode(match, "PutAppend", "filename") def t_Filename(self, match): - return self.token_mode(match, "Filename", "expr") + "Filename rule" + return self._token_mode(match, "Filename", "expr") +