Skip to content

Commit

Permalink
feat(lexer): Added custom error type (#103)
Browse files Browse the repository at this point in the history
There are no tests for the `error` file due to the implementation
of the errors being done by `thiserror`.
  • Loading branch information
ElBe-Plaq committed Feb 29, 2024
1 parent d1b0320 commit 503a789
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 24 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ publish = false
[workspace.dependencies]
annotate-snippets = "0.10.0"
log = "0.4.20"
thiserror = "1.0.57"

tools = { path = "crates/tools" }

Expand Down
1 change: 1 addition & 0 deletions crates/lexer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ publish.workspace = true
[dependencies]
annotate-snippets.workspace = true
log.workspace = true
thiserror.workspace = true
tools.workspace = true

[lints]
Expand Down
60 changes: 60 additions & 0 deletions crates/lexer/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//! Errors that may occur while lexing.
// I Language lexer errors.
// Version: 1.0.0

// Copyright (c) 2023-present I Language Development.

// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the 'Software'),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:

// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.

// THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.

/////////////
// IMPORTS //
/////////////

use crate::tokens::token::Location;

use thiserror::Error;


////////////
// ERRORS //
////////////

/// The different kinds of errors the lexer can raise.
/// All of these errors implement [`std::error::Error`].
#[allow(clippy::module_name_repetitions)]
#[derive(Clone, Debug, Eq, Error, Hash, Ord, PartialEq, PartialOrd)]
pub enum LexerError {
/// An error which will be returned if a mark was invalid for some reason.
/// This can occur when the starting character of a mark is valid, but the character after it is not.
#[error("invalid mark at {location}")]
InvalidMark { location: Location },

/// An error which will be returned if an unexpected character is encountered.
/// this is most likely to occur when using unicode characters as they are not supported.
#[error("unexpected character `{character}` at {location}")]
UnexpectedCharacter { character: char, location: Location },

/// An error which will be returned if a comment is not terminated by a closing `*/`.
#[error("unterminated comment at {location}")]
UnterminatedComment { location: Location },

/// An error which will be returned if a string is not terminated by a closing quote or the quote is escaped.
#[error("unterminated string at {location}")]
UnterminatedString { location: Location },
}
25 changes: 13 additions & 12 deletions crates/lexer/src/lex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
// IMPORTS //
/////////////

use crate::error::LexerError;
use crate::tokens::constant::Type;
use crate::tokens::keyword::Keyword;
use crate::tokens::token::{GetToken, Location, Token, TokenType, TypeDefinition};
Expand Down Expand Up @@ -105,9 +106,8 @@ use log::trace;
/// - [`Location`]
#[inline] // Suggesting inlining due to rare calls to the function
#[allow(clippy::too_many_lines)]
// TODO (ElBe, Ranastra): Switch to custom error type
pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {
let mut error: Option<String> = None;
pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, LexerError> {
let mut error: Option<LexerError> = None;
let mut result: Vec<Token> = vec![];

let mut iterator: std::iter::Peekable<std::iter::Enumerate<std::str::Chars>>;
Expand All @@ -134,12 +134,10 @@ pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {
};

if character == '"' || character == '\'' {
result.push(TypeDefinition::lex_string(
&mut iterator,
line,
location,
character,
));
match TypeDefinition::lex_string(&mut iterator, line, location, character) {
Ok(value) => result.push(value),
Err(error_value) => error = Some(error_value),
};
} else if matches!(
character,
'+' | '-'
Expand Down Expand Up @@ -194,7 +192,7 @@ pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {
let renderer: annotate_snippets::Renderer =
annotate_snippets::Renderer::styled();
eprintln!("{}", renderer.render(snippet));
error = Some(format!("Syntax error: Invalid mark at {location}"));
error = Some(LexerError::InvalidMark { location });
}
} else if character.is_ascii_digit() {
buffer.push(character);
Expand Down Expand Up @@ -265,7 +263,10 @@ pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {

let renderer: annotate_snippets::Renderer = annotate_snippets::Renderer::styled();
eprintln!("{}", renderer.render(snippet));
error = Some(format!("Syntax error: Unexpected character at {location}"));
error = Some(LexerError::UnexpectedCharacter {
character,
location,
});
}

trace!(
Expand All @@ -276,7 +277,7 @@ pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {
}

match error {
Some(message) => Err(format!("Error during lexing (last): {message}")),
Some(error_value) => Err(error_value),
None => Ok(result),
}
}
1 change: 1 addition & 0 deletions crates/lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@
// EXPORTS //
/////////////

pub mod error;
pub mod lex;
pub mod tokens;
20 changes: 11 additions & 9 deletions crates/lexer/src/tokens/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
use core;
use std;

use crate::error::LexerError;
use crate::tokens::constant::Type;
use crate::tokens::keyword::Keyword;
use crate::tokens::mark::Mark;
Expand Down Expand Up @@ -163,7 +164,7 @@ impl TypeDefinition {
///
/// # Returns
///
/// The string as a [`Token`].
/// A result of the string as a [`Token`].
///
/// # Errors
///
Expand All @@ -182,11 +183,11 @@ impl TypeDefinition {
/// # line: 1,
/// # column: 1,
/// # };
/// assert_eq!(TypeDefinition::lex_string(&mut iterator, input, location.clone(), '\''), Token {
/// assert_eq!(TypeDefinition::lex_string(&mut iterator, input, location.clone(), '\''), Ok(Token {
/// location,
/// content: "my string".to_owned(),
/// token_type: TokenType::TypeDefinition(TypeDefinition::String)
/// });
/// }));
///
///
/// ```
Expand All @@ -197,13 +198,12 @@ impl TypeDefinition {
/// - [`TypeDefinition`]
/// - [`TypeDefinition::String`]
#[inline(always)]
// TODO: Errors
pub fn lex_string(
iterator: &mut std::iter::Peekable<std::iter::Enumerate<std::str::Chars>>,
line: &str,
location: Location,
quote_type: char,
) -> Token {
) -> Result<Token, LexerError> {
let last_character: core::cell::Cell<char> = core::cell::Cell::new('\0');
let second_to_last_character: core::cell::Cell<char> = core::cell::Cell::new('\0');
let buffer: Vec<char> = iterator
Expand Down Expand Up @@ -270,17 +270,19 @@ impl TypeDefinition {
if next_character != quote_type {
let renderer: annotate_snippets::Renderer = annotate_snippets::Renderer::styled();
eprintln!("{}", renderer.render(snippet));
return Err(LexerError::UnterminatedString { location });
}
} else {
let renderer: annotate_snippets::Renderer = annotate_snippets::Renderer::styled();
eprintln!("{}", renderer.render(snippet));
return Err(LexerError::UnterminatedString { location });
}

Token {
Ok(Token {
location,
content: buffer.iter().collect::<String>(),
token_type: TokenType::TypeDefinition(TypeDefinition::String),
}
})
}
}

Expand Down Expand Up @@ -376,7 +378,7 @@ impl TokenType {
line: &str,
location: Location,
character: char,
) -> Result<Option<Token>, String> {
) -> Result<Option<Token>, LexerError> {
let mut buffer: Vec<char> = vec![character];

if let Some(&(_, next_character)) = iterator.clone().peek() {
Expand Down Expand Up @@ -434,7 +436,7 @@ impl TokenType {

let renderer: annotate_snippets::Renderer = annotate_snippets::Renderer::styled();
eprintln!("{}", renderer.render(snippet));
return Err(format!("Syntax error: Unterminated comment at {location}"));
return Err(LexerError::UnterminatedComment { location });
}

iterator.next();
Expand Down
4 changes: 2 additions & 2 deletions crates/lexer/tests/tokens/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ mod tests {

assert_eq!(
TypeDefinition::lex_string(&mut iterator, input, location.clone(), '\''),
Token {
Ok(Token {
location,
content: "my string".to_owned(),
token_type: TokenType::TypeDefinition(TypeDefinition::String)
}
})
);
}

Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ fn main() {
let input: String = std::io::read_to_string(reader).unwrap();

let start: std::time::Instant = std::time::Instant::now();
let output: Result<Vec<lexer::tokens::token::Token>, String> =
let output: Result<Vec<lexer::tokens::token::Token>, lexer::error::LexerError> =
lexer::lex::lex(input.trim(), &file_name);
debug!(
"Lexing `{file_name}` took {}ms.",
Expand Down

0 comments on commit 503a789

Please sign in to comment.