Skip to content

Commit

Permalink
[Lex] Concate adjacent string literals
Browse files Browse the repository at this point in the history
  • Loading branch information
LuisHsu committed Apr 12, 2024
1 parent a9b0fd6 commit f346573
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 19 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ build-*/
build/
install/
.devcontainer
.DS_Store
.DS_Store
exclude
4 changes: 4 additions & 0 deletions src/include/Error.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ struct Error : public Exception {
SourcePos pos;
};

struct SyntaxError : public Error {
SyntaxError(SourcePos pos, std::string msg) : Error(pos, "syntax error:" + msg){}
};

} // namespace WasmVM
} // namespace WasmVM

Expand Down
3 changes: 3 additions & 0 deletions src/include/Lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define WVMCC_Lexer_DEF

#include <PreProcessor.hpp>
#include <deque>

namespace WasmVM {

Expand All @@ -12,6 +13,8 @@ struct Lexer {

private:
PreProcessor &pp;
std::deque<Token> buffer;
std::optional<Token> next();
};

} // namespace WasmVM
Expand Down
1 change: 1 addition & 0 deletions src/include/PreProcessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct PreProcessor {
PPToken(std::nullopt_t n = std::nullopt) : std::optional<Token>(){}
PPToken(Token&& token) : std::optional<Token>(token){}
PPToken(Token& token) : std::optional<Token>(token){}
PPToken(std::optional<Token>& token) : std::optional<Token>(token){}

template<typename T> requires TokenType::is_valid<T>::value
inline bool hold() {
Expand Down
9 changes: 6 additions & 3 deletions src/include/Token.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ bool operator==(const CharacterConstant&, const CharacterConstant&);
struct StringLiteral {
StringLiteral(std::string sequence);
std::string sequence;
std::variant<std::string, std::wstring, std::u8string, std::u16string, std::u32string> value;
std::variant<std::string, std::u8string, std::wstring, std::u16string, std::u32string> value;
StringLiteral& operator+=(StringLiteral);
};
bool operator==(const StringLiteral&, const StringLiteral&);

Expand All @@ -91,7 +92,8 @@ using Base = std::variant<
FloatingConstant,
CharacterConstant,
HeaderName,
StringLiteral
StringLiteral,
Keyword
>;

template<typename T> requires
Expand All @@ -104,7 +106,8 @@ template<typename T> requires
std::is_same_v<T, FloatingConstant> ||
std::is_same_v<T, CharacterConstant> ||
std::is_same_v<T, HeaderName> ||
std::is_same_v<T, StringLiteral>
std::is_same_v<T, StringLiteral> ||
std::is_same_v<T, Keyword>
struct is_valid {
template<typename U> requires std::is_constructible_v<T, U>
is_valid(){}
Expand Down
29 changes: 20 additions & 9 deletions src/lib/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <Compiler.hpp>

#include <PreProcessor.hpp>
#include <Lexer.hpp>
#include <iostream> // FIXME:

using namespace WasmVM;
Expand All @@ -27,17 +28,27 @@ Compiler::Compiler(std::vector<std::filesystem::path> include_paths) :

WasmModule Compiler::compile(std::filesystem::path source_path){
PreProcessor pp(source_path, include_paths);
Lexer lexer(pp);
// TODO:
for(PreProcessor::PPToken tok = pp.get(); tok.has_value(); tok = pp.get()){
std::cout << tok->pos << " " << tok.value();
if(!tok.expanded.empty()){
std::cout << " (expand from: ";
for(std::string exp_macro : tok.expanded){
std::cout << exp_macro << " ";
}
std::cout << ")";
for(std::optional<Token> tok = lexer.get(); tok.has_value(); tok = lexer.get()){
Token& token = tok.value();
std::cout << token.pos;
if(std::holds_alternative<TokenType::Keyword>(token)){
std::cout << " [Keyword] ";
}else if(std::holds_alternative<TokenType::Identifier>(token)){
std::cout << " [Identifier] ";
}else if(std::holds_alternative<TokenType::IntegerConstant>(token)){
std::cout << " [Integer] ";
}else if(std::holds_alternative<TokenType::FloatingConstant>(token)){
std::cout << " [Float] ";
}else if(std::holds_alternative<TokenType::CharacterConstant>(token)){
std::cout << " [Character] ";
}else if(std::holds_alternative<TokenType::StringLiteral>(token)){
std::cout << " [String] ";
}else if(std::holds_alternative<TokenType::Punctuator>(token)){
std::cout << " [Punct] ";
}
std::cout << std::endl;
std::cout << token << std::endl;
}
return WasmModule(); // FIXME:
}
65 changes: 63 additions & 2 deletions src/lib/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,72 @@
// along with wvmcc. If not, see <https://www.gnu.org/licenses/>.

#include <Lexer.hpp>
#include <Error.hpp>
#include <string>

using namespace WasmVM;

Lexer::Lexer(PreProcessor &pp) : pp(pp){}

std::optional<Token> Lexer::get(){
PreProcessor::PPToken token = pp.get();
std::optional<Token> Lexer::get(){
std::optional<Token> token = next();
if(token.has_value()){
return std::visit<std::optional<Token>>(overloaded {
[&token](TokenType::Identifier& id){
if(TokenType::Keyword::is_keyword(id.sequence)){
return Token(TokenType::Keyword(id.sequence), token->pos);
}else{
return token.value();
}
},
[&token](TokenType::PPNumber& num){
if(num.type == TokenType::PPNumber::Int){
if(num.sequence.find('u') != std::string::npos || num.sequence.find('U') != std::string::npos){
return Token(TokenType::IntegerConstant(num.get<uintmax_t>()), token->pos);
}else{
return Token(TokenType::IntegerConstant(num.get<intmax_t>()), token->pos);
}
}else{
return Token(num.get<long double>(), token->pos);
}
},
[&](TokenType::StringLiteral& str){
std::optional<Token> next_tok;
for(next_tok = next(); next_tok && std::holds_alternative<TokenType::StringLiteral>(next_tok.value()); next_tok = next()){
try{
str += std::get<TokenType::StringLiteral>(next_tok.value());
}catch(Exception::Exception e){
throw Exception::SyntaxError(token.value().pos, e.what());
}
}
if(next_tok){
buffer.emplace_front(next_tok.value());
}
return Token(str, token->pos);
},
[&token](TokenType::HeaderName&){
throw Exception::SyntaxError(token.value().pos, "header name is invalid in parser");
return std::nullopt;
},
[&token](auto&){
return token;
}
}, token.value());
}else{
return std::nullopt;
}
}

std::optional<Token> Lexer::next(){
PreProcessor::PPToken token;
if(buffer.empty()){
token = pp.get();
while(token && (token.hold<TokenType::NewLine>() || token.hold<TokenType::WhiteSpace>())){
token = pp.get();
}
}else{
token = buffer.front();
buffer.pop_front();
}
return token;
}
60 changes: 56 additions & 4 deletions src/lib/Token.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,9 @@ std::ostream& operator<<(std::ostream& os, Token& token){
[&](TokenType::Identifier& tok){
os << tok.sequence;
},
[&](TokenType::Keyword& tok){
os << tok.value;
},
[&](TokenType::IntegerConstant& tok){
std::visit(overloaded{
[&](auto val){
Expand Down Expand Up @@ -549,6 +552,9 @@ std::string Token::str(){
[](TokenType::Identifier& tok){
return tok.sequence;
},
[](TokenType::Keyword& tok){
return tok.value;
},
[](TokenType::CharacterConstant& tok){
return tok.sequence;
},
Expand Down Expand Up @@ -652,10 +658,6 @@ TokenType::CharacterConstant::CharacterConstant(std::string sequence) : sequence
}
}

enum class StringPrefix {
none, L, u, u8, U
};

TokenType::StringLiteral::StringLiteral(std::string sequence) : sequence(sequence){
auto seq_it = sequence.begin();
int width = sizeof(char);
Expand Down Expand Up @@ -709,6 +711,56 @@ TokenType::StringLiteral::StringLiteral(std::string sequence) : sequence(sequenc
}
}

TokenType::StringLiteral& TokenType::StringLiteral::operator+=(TokenType::StringLiteral rhs){
if(value.index() == rhs.value.index()){
std::visit(overloaded {
[&](std::string& v){
v += std::get<std::string>(rhs.value);
},
[&](std::wstring& v){
v += std::get<std::wstring>(rhs.value);
},
[&](std::u8string& v){
v += std::get<std::u8string>(rhs.value);
},
[&](std::u16string& v){
v += std::get<std::u16string>(rhs.value);
},
[&](std::u32string& v){
v += std::get<std::u32string>(rhs.value);
}
}, value);
}else if(std::holds_alternative<std::string>(value)){
std::string str = std::get<std::string>(value);
std::visit(overloaded {
[&](std::string& v){},
[&](std::wstring& v){
value.emplace<std::wstring>(str.begin(), str.end()) += v;
},
[&](std::u8string& v){
value.emplace<std::u8string>(str.begin(), str.end()) += v;
},
[&](std::u16string& v){
value.emplace<std::u16string>(str.begin(), str.end()) += v;
},
[&](std::u32string& v){
value.emplace<std::u32string>(str.begin(), str.end()) += v;
}
}, rhs.value);
}else if(std::holds_alternative<std::string>(rhs.value)){
std::string& str = std::get<std::string>(rhs.value);
std::visit(overloaded {
[&](auto& v){
v.append(str.begin(), str.end());
}
}, value);
}else{
throw Exception::Exception("unable to concate string literal with different type");
}

return *this;
}

TokenType::Keyword::Keyword(std::string val) : value(val){
if(!is_keyword(value)){
throw Exception::Exception("unknown keyword '" + value + "'");
Expand Down

0 comments on commit f346573

Please sign in to comment.