Skip to content

Commit

Permalink
[WIP] Lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
LuisHsu committed Apr 10, 2024
1 parent 1f64e47 commit 87333f1
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 13 deletions.
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_library(cc
lib/PreProcessor/Macro.cpp
lib/PreProcessor/Expression.cpp
lib/Token.cpp
lib/Lexer.cpp
)

add_executable(wasmvm-cc
Expand Down
17 changes: 17 additions & 0 deletions src/exec/hello.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma STDC FP_CONTRACT ON
;
#pragma STDC FP_CONTRACT OFF
;
#pragma STDC FP_CONTRACT DEFAULT
;
#pragma STDC FENV_ACCESS ON
;
#pragma STDC FENV_ACCESS OFF
;
#pragma STDC FENV_ACCESS DEFAULT
;
#pragma STDC CX_LIMITED_RANGE ON
;
#pragma STDC CX_LIMITED_RANGE OFF
;
#pragma STDC CX_LIMITED_RANGE DEFAULT
19 changes: 19 additions & 0 deletions src/include/Lexer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef WVMCC_Lexer_DEF
#define WVMCC_Lexer_DEF

#include <PreProcessor.hpp>

namespace WasmVM {

struct Lexer {
Lexer(PreProcessor &pp);

std::optional<Token> get();

private:
PreProcessor &pp;
};

} // namespace WasmVM

#endif
44 changes: 41 additions & 3 deletions src/include/Token.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ struct Punctuator {
return type == op.type;
};
};

struct Keyword {
Keyword(std::string);
std::string value;

static bool is_keyword(std::string);

constexpr bool operator==(const Keyword& op) const {
return value == op.value;
};
};

struct NewLine {};
bool operator==(const NewLine&, const NewLine&);

Expand All @@ -35,6 +47,8 @@ struct PPNumber {
T get();
enum {Float, Int} type;
std::string sequence;
private:
int base();
};
bool operator==(const PPNumber&, const PPNumber&);

Expand Down Expand Up @@ -64,9 +78,33 @@ struct HeaderName {
};
bool operator==(const HeaderName&, const HeaderName&);

using Base = std::variant<Punctuator, NewLine, WhiteSpace, PPNumber, Identifier, CharacterConstant, HeaderName, StringLiteral>;

template<typename T> requires std::is_same_v<T, Punctuator> || std::is_same_v<T, NewLine> || std::is_same_v<T, WhiteSpace> || std::is_same_v<T, PPNumber> || std::is_same_v<T, Identifier> || std::is_same_v<T, CharacterConstant> || std::is_same_v<T, HeaderName> || std::is_same_v<T, StringLiteral>
using IntegerConstant = std::variant<intmax_t, uintmax_t>;
using FloatingConstant = long double;

using Base = std::variant<
Punctuator,
NewLine,
WhiteSpace,
PPNumber,
Identifier,
IntegerConstant,
FloatingConstant,
CharacterConstant,
HeaderName,
StringLiteral
>;

template<typename T> requires
std::is_same_v<T, Punctuator> ||
std::is_same_v<T, NewLine> ||
std::is_same_v<T, WhiteSpace> ||
std::is_same_v<T, PPNumber> ||
std::is_same_v<T, Identifier> ||
std::is_same_v<T, IntegerConstant> ||
std::is_same_v<T, FloatingConstant> ||
std::is_same_v<T, CharacterConstant> ||
std::is_same_v<T, HeaderName> ||
std::is_same_v<T, StringLiteral>
struct is_valid {
template<typename U> requires std::is_constructible_v<T, U>
is_valid(){}
Expand Down
24 changes: 24 additions & 0 deletions src/lib/Lexer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (C) 2024 Luis Hsu
//
// wvmcc is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// wvmcc is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with wvmcc. If not, see <https://www.gnu.org/licenses/>.

#include <Lexer.hpp>

using namespace WasmVM;

Lexer::Lexer(PreProcessor &pp) : pp(pp){}

std::optional<Token> Lexer::get(){
PreProcessor::PPToken token = pp.get();
}
65 changes: 55 additions & 10 deletions src/lib/Token.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cctype>
#include <cinttypes>
#include <climits>
#include <unordered_set>

using namespace WasmVM;

Expand Down Expand Up @@ -333,6 +334,16 @@ std::ostream& operator<<(std::ostream& os, Token& token){
[&](TokenType::Identifier& tok){
os << tok.sequence;
},
[&](TokenType::IntegerConstant& tok){
std::visit(overloaded{
[&](auto val){
os << val;
}
}, tok);
},
[&](TokenType::FloatingConstant& tok){
os << tok;
},
[&](TokenType::CharacterConstant& tok){
std::visit(overloaded {
[&](int val){
Expand Down Expand Up @@ -547,36 +558,53 @@ std::string Token::str(){
[](TokenType::StringLiteral& tok){
return tok.sequence;
},
[](TokenType::IntegerConstant& tok){
std::stringstream ss;
std::visit(overloaded{
[&](auto val){
ss << val;
}
}, tok);
return ss.str();
},
[](TokenType::FloatingConstant& tok){
std::stringstream ss;
ss << tok;
return ss.str();
},
}, *this);
}

template<>
intmax_t TokenType::PPNumber::get<intmax_t>(){
int TokenType::PPNumber::base(){
int base = 10;
if(sequence.starts_with("0x") || sequence.starts_with("0X")){
base = 16;
}else if(sequence.starts_with("0")){
base = 8;
}
return std::strtoimax(sequence.c_str(), nullptr, base);
return base;
}

template<>
intmax_t TokenType::PPNumber::get<intmax_t>(){
return std::strtoimax(sequence.c_str(), nullptr, base());
}

template<>
uintmax_t TokenType::PPNumber::get<uintmax_t>(){
int base = 10;
if(sequence.starts_with("0x") || sequence.starts_with("0X")){
base = 16;
}else if(sequence.starts_with("0")){
base = 8;
}
return std::strtoumax(sequence.c_str(), nullptr, base);
return std::strtoumax(sequence.c_str(), nullptr, base());
}

template<>
double TokenType::PPNumber::get<double>(){
return std::stod(sequence);
}

template<>
long double TokenType::PPNumber::get<long double>(){
return std::stold(sequence);
}

TokenType::CharacterConstant::CharacterConstant(std::string sequence) : sequence(sequence){
auto seq_it = sequence.begin();
int width = sizeof(char);
Expand Down Expand Up @@ -681,6 +709,23 @@ TokenType::StringLiteral::StringLiteral(std::string sequence) : sequence(sequenc
}
}

TokenType::Keyword::Keyword(std::string val) : value(val){
if(!is_keyword(value)){
throw Exception::Exception("unknown keyword '" + value + "'");
}
}

bool TokenType::Keyword::is_keyword(std::string val){
static const std::unordered_set<std::string> keywords {
"auto", "extern", "short", "while", "break", "float", "signed", "case", "for", "sizeof",
"char", "goto", "static", "const", "if", "struct", "continue", "inline", "switch", "default",
"int", "typedef", "_Generic", "do", "long", "union", "_Imaginary", "double", "register", "unsigned",
"_Noreturn", "else", "restrict", "void", "_Static_assert", "enum", "return", "volatile", "_Thread_local", "_Alignas",
"_Alignaof", "_Atomic", "_Bool", "_Complex"
};
return keywords.contains(val);
}

bool TokenType::operator==(const NewLine&, const NewLine&){
return true;
}
Expand Down

0 comments on commit 87333f1

Please sign in to comment.