From 6b1163e8e9203b6d2446a572bbda7d384b685c6d Mon Sep 17 00:00:00 2001 From: CohenArthur Date: Fri, 20 Oct 2023 02:26:12 +0200 Subject: [PATCH 1/4] xparser: Rewrite block parsing logic --- xparser/src/constructs.rs | 93 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 5 deletions(-) diff --git a/xparser/src/constructs.rs b/xparser/src/constructs.rs index a8f80197..9ec1e705 100644 --- a/xparser/src/constructs.rs +++ b/xparser/src/constructs.rs @@ -29,7 +29,6 @@ use ast::Value; use ast::{Ast, TypeContent}; use location::Location; use location::SpanTuple; -use nom::sequence::tuple; use nom::Err::Error as NomError; use nom::Slice; use nom_locate::position; @@ -37,7 +36,8 @@ use symbol::Symbol; use nom::{ branch::alt, bytes::complete::take, character::complete::multispace0, combinator::opt, - multi::many0, sequence::delimited, sequence::pair, sequence::preceded, sequence::terminated, + multi::many0, multi::many1, sequence::delimited, sequence::pair, sequence::preceded, + sequence::terminated, sequence::tuple, }; // FIXME: Refactor, rework, remove, depreciate @@ -777,23 +777,89 @@ fn return_type(input: ParseInput) -> ParseResult> { } } -/// block = '{' next inner_block +/// block = '{' ([expr ';' | loop | if | function])* expr? '}' pub fn block(input: ParseInput) -> ParseResult { let (input, start_loc) = position(input)?; let (input, _) = tokens::left_curly_bracket(input)?; let input = next(input); - let (input, block) = inner_block(input)?; + + // Check if the block is immediately closed and early return + if let Ok((input, _)) = tokens::right_curly_bracket(input) { + let (input, end_loc) = position(input)?; + + return Ok(( + input, + Ast { + location: pos_to_loc(input, start_loc, end_loc), + node: Node::Block { + stmts: vec![], + last_is_expr: false, + }, + }, + )); + } + + let stmt = |input| { + let (input, start_loc) = position(input)?; + + if let Ok((input, _)) = tokens::if_tok(input) { + unit_if(input, start_loc.into()) + } else if let Ok((input, kind)) = + alt((tokens::func_tok, tokens::test_tok, tokens::mock_tok))(input) + { + unit_func(input, kind, start_loc.into()) + } else if tokens::left_curly_bracket(input).is_ok() { + // we don't update `input` here so that we can just call `block` + block(input) + } else { + let (input, expr) = expr(input)?; + let input = next(input); + let (input, _) = tokens::semicolon(input)?; + + Ok((input, expr)) + } + }; + + let (input, stmts) = opt(many1(stmt))(input)?; + let (input, last_expr) = opt(expr)(input)?; + let last_is_expr = last_expr.is_some(); + let (input, end_loc) = position(input)?; + let mut stmts = stmts.unwrap_or_default(); + + if let Some(expr) = last_expr { + stmts.push(expr) + }; Ok(( input, Ast { location: pos_to_loc(input, start_loc, end_loc), - node: block, + node: Node::Block { + stmts, + last_is_expr, + }, }, )) } +// /// block = '{' next inner_block +// pub fn block(input: ParseInput) -> ParseResult { +// let (input, start_loc) = position(input)?; +// let (input, _) = tokens::left_curly_bracket(input)?; +// let input = next(input); +// let (input, block) = inner_block(input)?; +// let (input, end_loc) = position(input)?; + +// Ok(( +// input, +// Ast { +// location: pos_to_loc(input, start_loc, end_loc), +// node: block, +// }, +// )) +// } + /// inner_block = '}' /// | expr '}' (* The only case where block is an expr *) /// | expr ';' next inner_block @@ -819,6 +885,23 @@ fn inner_block(input: ParseInput) -> ParseResult { )); } + // the lines from here on out until the end of the function are problematic. when we parse + // the following: + // + // ```text + // func foo() -> ReturnValue { + // if condition {} + // + // return_value + // } + // ``` + // + // then the parser will fail, as we indicate that all expresssions or statements of a block must be preceded + // by a semicolon. this isn't great. the code which will work with the current parser thus adds an extra + // semicolon at the end of the `if` block. plus, this function keeps calling itself recursively and adding + // at the beginning of the `stmts` vector, instead of accumulating statements. + // Issue #395 + let (input, block) = preceded(tokens::semicolon, preceded(nom_next, inner_block))(input)?; let (mut stmts, last_is_expr) = match block { Node::Block { From 8f21638c58cdea08d2c9ebb698d0601ab3b6bcb4 Mon Sep 17 00:00:00 2001 From: CohenArthur Date: Sat, 21 Oct 2023 20:30:38 +0200 Subject: [PATCH 2/4] xparser: Allow inner blocks to not end with a semicolon --- xparser/src/constructs.rs | 40 ++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/xparser/src/constructs.rs b/xparser/src/constructs.rs index 9ec1e705..59287bdd 100644 --- a/xparser/src/constructs.rs +++ b/xparser/src/constructs.rs @@ -58,7 +58,7 @@ pub fn many_exprs(mut input: ParseInput) -> ParseResult> { if input.is_empty() { return Ok((input, exprs)); } - let (new_input, expr) = expr_semicolon(input)?; + let (new_input, expr) = expr_maybe_semi(input)?; input = new_input; exprs.push(expr); } @@ -67,13 +67,25 @@ pub fn many_exprs(mut input: ParseInput) -> ParseResult> { /// Parse an instruction and maybe the semicolon that follows. /// /// expr_semicolon = expr [ ';' ] -pub fn expr_semicolon(input: ParseInput) -> ParseResult { +pub fn expr_maybe_semi(input: ParseInput) -> ParseResult { let (input, expr) = expr(input)?; + let input = next(input); let (input, _) = opt(tokens::semicolon)(input)?; Ok((input, expr)) } +/// Parse an instruction and the semicolon that follows. +/// +/// expr_semicolon = expr [ ';' ] +pub fn expr_semicolon(input: ParseInput) -> ParseResult { + let (input, expr) = expr(input)?; + let input = next(input); + let (input, _) = tokens::semicolon(input)?; + + Ok((input, expr)) +} + // expr = cmp ( '<' cmp | '>' cmp | '<=' cmp | '>=' cmp | '==' cmp | '!=' cmp)* pub fn expr(input: ParseInput) -> ParseResult { let input = next(input); @@ -800,6 +812,7 @@ pub fn block(input: ParseInput) -> ParseResult { } let stmt = |input| { + let input = next(input); let (input, start_loc) = position(input)?; if let Ok((input, _)) = tokens::if_tok(input) { @@ -812,16 +825,15 @@ pub fn block(input: ParseInput) -> ParseResult { // we don't update `input` here so that we can just call `block` block(input) } else { - let (input, expr) = expr(input)?; - let input = next(input); - let (input, _) = tokens::semicolon(input)?; - - Ok((input, expr)) + expr_semicolon(input) } }; let (input, stmts) = opt(many1(stmt))(input)?; let (input, last_expr) = opt(expr)(input)?; + let input = next(input); + let (input, _) = tokens::right_curly_bracket(input)?; + let last_is_expr = last_expr.is_some(); let (input, end_loc) = position(input)?; @@ -2378,4 +2390,18 @@ mod tests { TypeKind::FunctionLike(..) )); } + + #[test] + fn block_in_block_does_not_require_extra_semicolon_395() { + let input = span!("func foo() { if true { 15 } else { 14 } }"); + + assert!(expr(input).is_ok()); + } + + #[test] + fn block_in_block_does_not_require_extra_semicolon_395_2() { + let input = span!("func foo() { { { where x = 14; } } }"); + + assert!(expr(input).is_ok()); + } } From 5c93f345f941aa356859ca5239722e66a8a29cca Mon Sep 17 00:00:00 2001 From: CohenArthur Date: Sun, 22 Oct 2023 02:43:00 +0200 Subject: [PATCH 3/4] wip: f me up --- xparser/src/constructs.rs | 53 ++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/xparser/src/constructs.rs b/xparser/src/constructs.rs index 59287bdd..27571072 100644 --- a/xparser/src/constructs.rs +++ b/xparser/src/constructs.rs @@ -815,18 +815,26 @@ pub fn block(input: ParseInput) -> ParseResult { let input = next(input); let (input, start_loc) = position(input)?; - if let Ok((input, _)) = tokens::if_tok(input) { - unit_if(input, start_loc.into()) + let (input, stmt) = if let Ok((input, _)) = tokens::if_tok(input) { + unit_if(input, start_loc.into())? } else if let Ok((input, kind)) = alt((tokens::func_tok, tokens::test_tok, tokens::mock_tok))(input) { - unit_func(input, kind, start_loc.into()) + unit_func(input, kind, start_loc.into())? } else if tokens::left_curly_bracket(input).is_ok() { + // TODO: This does not work to have a block as the last expression of another block + // `stmt` will parse it and return it as a statement + // we need to do something smarted like lifting the last statement to an expression if there was no semicolon + // we don't update `input` here so that we can just call `block` - block(input) + block(input)? } else { - expr_semicolon(input) - } + expr_semicolon(input)? + }; + + let (input, _) = opt(tokens::semicolon)(input)?; + + Ok((input, stmt)) }; let (input, stmts) = opt(many1(stmt))(input)?; @@ -2404,4 +2412,37 @@ mod tests { assert!(expr(input).is_ok()); } + + #[test] + fn early_return395() { + let input = span!("func halloween(b: bool) -> int { if b { return 14 }; 15 }"); + + assert!(expr(input).is_ok()); + } + + #[test] + fn inner_block_with_semi395() { + let input = span!( + "func halloween() -> int { func inner() -> int { { { { { return 14; } } } } }; 15 }" + ); + + assert!(expr(input).is_ok()) + } + + #[test] + fn inner_nested_function395() { + let input = span! {r#" + func halloween() -> string { + func inner() -> string { + { { { { + return "boo"; + } } } } + }; + + "different string" + } + "#}; + + assert!(expr(input).is_ok()) + } } From c511c804f0e60f5701caac96d1502a5ba882c7a9 Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Tue, 6 Feb 2024 22:27:02 +0100 Subject: [PATCH 4/4] fire: Fix extra semicolon in ast!{} invocation causing issues --- fire/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fire/src/lib.rs b/fire/src/lib.rs index 764f6c8f..c14413a2 100644 --- a/fire/src/lib.rs +++ b/fire/src/lib.rs @@ -538,7 +538,7 @@ mod tests { { { { { return "boo"; } } } } - }; + } "different string" }