Skip to content

Commit

Permalink
FIX Recognize characters correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
Saigut committed Nov 12, 2017
1 parent 1399ca2 commit 278b737
Showing 1 changed file with 30 additions and 12 deletions.
42 changes: 30 additions & 12 deletions src/schemely/lexer/SchemeLexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ enum Tag {
S_OPERATOR,
S_NUMBER,
S_QUOTE_STRING,
S_QUOTE_CHAR,
S_CHAR,
S_LITERAL,
S_BAD_ELEMENT,

Expand Down Expand Up @@ -74,8 +74,9 @@ private static Pattern notChar2(final char c1, final char c2) {
.map((a) -> (org.jparsec.Tokens.fragment("WHITE_SPACE", Tag.S_WHITE_SPACE)));

// Operators
Pattern PT_OPERATORS = Patterns.among("()[]'`,#\\");
Parser<String> PS_OPERATORS = PT_OPERATORS.toScanner("operator").source();
Pattern PT_OPS = Patterns.among("()[]'`,");
Pattern PT_OP_SHARP = Patterns.isChar('#').next(Patterns.isChar('(').peek());
Parser<String> PS_OPERATORS = Patterns.or(PT_OPS, PT_OP_SHARP).toScanner("operator").source();

Parser<?> s_operators = PS_OPERATORS
.map((a) -> (org.jparsec.Tokens.fragment(a, Tag.S_OPERATOR)));
Expand All @@ -92,20 +93,38 @@ private static Pattern notChar2(final char c1, final char c2) {
.map((a) -> (org.jparsec.Tokens.fragment(a, Tag.S_NUMBER)));

// Literals
Pattern literal_valid = Patterns
Pattern PT_LITERAL_VALID = Patterns
.or(Patterns.isChar(CharPredicates.IS_LETTER),
Patterns.isChar(CharPredicates.IS_DIGIT),
Patterns.among("!@$%^&*-+_=:|/?<>."));
Pattern P_LITERAL = literal_valid.many1();
Parser<String> LITERAL = P_LITERAL.toScanner("literal").source();
Pattern PT_LITERAL = PT_LITERAL_VALID.many1();
Parser<String> LITERAL = PT_LITERAL.toScanner("literal").source();

Parser<?> s_string = Scanners.DOUBLE_QUOTE_STRING
.map((a) -> (org.jparsec.Tokens.fragment(a, Tag.S_QUOTE_STRING)));
Parser<?> s_char = Scanners.SINGLE_QUOTE_CHAR
.map((a) -> (org.jparsec.Tokens.fragment(a, Tag.S_QUOTE_CHAR)));
Parser<?> s_literal = LITERAL
.map((a) -> (org.jparsec.Tokens.fragment(a, Tag.S_LITERAL)));


Terminals CHAR_NAMES = Terminals
.operators("alarm", "backspace", "delete", "esc", "linefeed", "newline", "page", "return",
"space", "tab", "vtab");
Pattern PT_SINGLE_CHAR = Patterns.ANY_CHAR.next(PT_LITERAL.not());
Pattern PT_HEX = Patterns.sequence(Patterns.among("xX"), Patterns.among("0123456789abcdefABCDEF").many1())
.next(PT_LITERAL.not());
Pattern PT_CHAR_PRE = Patterns.string("#\\");

Parser<?> s_char_pre = PT_CHAR_PRE.toScanner("char prefix");
Parser<?> s_char_names = CHAR_NAMES.tokenizer().next(LITERAL.not());

Parser<?> s_char_char = Patterns.sequence(PT_CHAR_PRE, PT_SINGLE_CHAR).toScanner("char char");
Parser<?> s_char_hex = Patterns.sequence(PT_CHAR_PRE, PT_HEX).toScanner("char hex");
Parser<?> s_char_name = Parsers.sequence(s_char_pre, s_char_names);
Parser<?> s_char = Parsers.or(s_char_char, s_char_name, s_char_hex).source()
.map((a) -> (org.jparsec.Tokens.fragment(a, Tag.S_CHAR)));


Parser<?> s_string = Scanners.DOUBLE_QUOTE_STRING
.map((a) -> (org.jparsec.Tokens.fragment(a, Tag.S_QUOTE_STRING)));

Parser<?> s_literals = Parsers.or(s_string, s_char, s_literal);


Expand All @@ -117,7 +136,6 @@ private static Pattern notChar2(final char c1, final char c2) {
.map((a) -> (org.jparsec.Tokens.fragment(a, Tag.S_KEYWORD)));



// Bad char
Parser<?> s_element = Parsers.or(s_whitespace, s_comment,
s_operators, s_numbers, s_keywords, s_literals);
Expand Down Expand Up @@ -264,7 +282,7 @@ public void advance()
type = Tokens.STRING_LITERAL;
break;

case S_QUOTE_CHAR:
case S_CHAR:
type = Tokens.CHAR_LITERAL;
break;

Expand Down

0 comments on commit 278b737

Please sign in to comment.