From b4c71b85f533f9bef785636ee82616563ae3013e Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 28 Oct 2024 14:15:20 +0100 Subject: [PATCH 01/23] Switch to the new AST repr for parser - part I First stab at making the parser compatible with the new AST representation (`bytecode::ast::Ast`). This is a heavy refactoring which required to update most of `parser::uniterm` and `parser::utils` as well as `grammar.lalrpop`. The current version is far from compiling; fixing compiler errors is planned in follow-up work. --- core/src/bytecode/ast/builder.rs | 48 ++ core/src/bytecode/ast/compat.rs | 58 +- core/src/bytecode/ast/mod.rs | 198 +++++- core/src/combine.rs | 12 +- core/src/eval/merge.rs | 2 + core/src/identifier.rs | 15 +- core/src/parser/grammar.lalrpop | 1018 ++++++++++++++++-------------- core/src/parser/mod.rs | 9 +- core/src/parser/uniterm.rs | 602 +++++++++++------- core/src/parser/utils.rs | 650 +++++++++---------- 10 files changed, 1484 insertions(+), 1128 deletions(-) diff --git a/core/src/bytecode/ast/builder.rs b/core/src/bytecode/ast/builder.rs index a7d58ee02a..4bc1d928c4 100644 --- a/core/src/bytecode/ast/builder.rs +++ b/core/src/bytecode/ast/builder.rs @@ -309,6 +309,54 @@ impl<'ast> Record<'ast> { } } +/// Multi-ary application for types implementing `Into`. +#[macro_export] +macro_rules! app { + ( $alloc:expr, $f:expr $(, $args:expr )+ $(,)?) => { + { + let args = vec![$( $crate::bytecode::ast::Ast::from($args) ),+]; + + $crate::bytecode::ast::Ast::from($alloc.app($crate::bytecode::ast::Ast::from($f), args)) + } + }; +} + +#[macro_export] +/// Multi-ary application for types implementing `Into`. +macro_rules! primop_app { + ( $alloc: expr, $op:expr $(, $args:expr )+ $(,)?) => { + { + let args = vec![$( $crate::bytecode::ast::Ast::from($args) ),+]; + $crate::bytecode::ast::Ast::from($alloc.prim_op($op, args)) + } + }; +} + +#[macro_export] +/// Multi argument function for types implementing `Into` (for the identifiers), and +/// `Into` for the body. +macro_rules! fun { + ( $alloc: expr, $id:expr, $body:expr $(,)?) => { + $crate::bytecode::ast::Ast::from( + $alloc.fun($crate::identifier::LocIdent::from($id), $crate::bytecode::ast::Ast::from($body)) + ) + }; + ( $alloc:expr, $id1:expr, $id2:expr $(, $rest:expr )+ $(,)?) => { + fun!($alloc, $crate::identifier::LocIdent::from($id1), fun!($alloc, $id2, $( $rest ),+)) + }; +} + +pub fn var<'ast>(id: impl Into) -> Ast<'ast> { + Ast::from(Node::Var(id.into())) +} + +pub fn enum_tag<'ast>(tag: impl Into) -> Ast<'ast> { + Ast::from(Node::EnumVariant { + tag: tag.into(), + arg: None, + }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index 1e47eadc53..a5d34d3961 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -270,30 +270,34 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> { Term::Bool(b) => Node::Bool(*b), Term::Num(n) => alloc.number(n.clone()), Term::Str(s) => alloc.string(s), - Term::StrChunks(chunks) => alloc.str_chunks( + Term::StrChunks(chunks) => alloc.string_chunks( chunks .iter() .map(|chunk| match chunk { - term::StrChunk::Literal(s) => StrChunk::Literal(s.clone()), + term::StrChunk::Literal(s) => StringChunk::Literal(s.clone()), term::StrChunk::Expr(expr, indent) => { - StrChunk::Expr(expr.to_ast(alloc), *indent) + StringChunk::Expr(expr.to_ast(alloc), *indent) } }) .rev(), ), Term::Fun(id, body) => alloc.fun(Pattern::any(*id), body.to_ast(alloc)), Term::FunPattern(pat, body) => alloc.fun(pat.to_ast(alloc), body.to_ast(alloc)), - Term::Let(bindings, body, attrs) => alloc.let_binding( - bindings - .iter() - .map(|(id, term)| (Pattern::any(*id), term.to_ast(alloc))), + Term::Let(bindings, body, attrs) => alloc.let_block( + bindings.iter().map(|(id, value)| LetBinding { + pattern: Pattern::any(*id), + value: value.to_ast(alloc), + metadata: Default::default(), + }), body.to_ast(alloc), attrs.rec, ), - Term::LetPattern(bindings, body, attrs) => alloc.let_binding( - bindings - .iter() - .map(|(pat, term)| (pat.to_ast(alloc), term.to_ast(alloc))), + Term::LetPattern(bindings, body, attrs) => alloc.let_block( + bindings.iter().map(|(pat, value)| LetBinding { + pattern: pat.to_ast(alloc), + value: value.to_ast(alloc), + metadata: Default::default(), + }), body.to_ast(alloc), attrs.rec, ), @@ -1196,12 +1200,12 @@ impl<'ast> FromAst> for term::Term { Node::Bool(b) => Term::Bool(*b), Node::Number(n) => Term::Num((**n).clone()), Node::String(s) => Term::Str((*s).into()), - Node::StrChunks(chunks) => { + Node::StringChunks(chunks) => { let chunks = chunks .iter() .map(|chunk| match chunk { - StrChunk::Literal(s) => term::StrChunk::Literal(s.clone()), - StrChunk::Expr(expr, indent) => { + StringChunk::Literal(s) => term::StrChunk::Literal(s.clone()), + StringChunk::Expr(expr, indent) => { term::StrChunk::Expr(expr.to_mainline(), *indent) } }) @@ -1222,10 +1226,16 @@ impl<'ast> FromAst> for term::Term { // a simpler / more compact `Let`. let try_bindings = bindings .iter() - .map(|(pat, term)| match pat.data { - PatternData::Any(id) => Some((id, term.to_mainline())), - _ => None, - }) + .map( + |LetBinding { + pattern, + metadata: _, + value, + }| match pattern.data { + PatternData::Any(id) => Some((id, value.to_mainline())), + _ => None, + }, + ) .collect::>>(); let body = body.to_mainline(); @@ -1239,13 +1249,21 @@ impl<'ast> FromAst> for term::Term { } else { let bindings = bindings .iter() - .map(|(pat, term)| (pat.to_mainline(), term.to_mainline())) + .map( + |LetBinding { + pattern, + value, + metadata: _, + }| { + (pattern.to_mainline(), value.to_mainline()) + }, + ) .collect(); Term::LetPattern(bindings, body, attrs) } } - Node::App { fun, args } => { + Node::App { head: fun, args } => { // unwrap(): the position of Ast should always be set (we might move to `RawSpan` // instead of `TermPos` soon) let fun_span = fun.pos.unwrap(); diff --git a/core/src/bytecode/ast/mod.rs b/core/src/bytecode/ast/mod.rs index ca360b2bb9..8647ff9ae6 100644 --- a/core/src/bytecode/ast/mod.rs +++ b/core/src/bytecode/ast/mod.rs @@ -26,7 +26,7 @@ use crate::{ }; // For now, we reuse those types from the term module. -pub use crate::term::{Number, StrChunk}; +pub use crate::term::{MergePriority, Number, StrChunk as StringChunk}; use bumpalo::Bump; @@ -42,16 +42,6 @@ use pattern::*; use primop::PrimOp; use typ::*; -/// A Nickel AST. Contains a root node and a span. -/// -//TODO: we don't expect to access the span much on the happy path. Should we add an indirection -//through a reference? -#[derive(Clone, Debug, PartialEq)] -pub struct Ast<'ast> { - node: Node<'ast>, - pos: TermPos, -} - /// A node of the Nickel AST. /// /// Nodes are built by the parser and then mostly traversed immutably. Such nodes are optimized for @@ -85,7 +75,7 @@ pub enum Node<'ast> { /// /// As opposed to [crate::term::Term::StrChunks], the chunks are stored in the original order: /// `"hello%{var}"` will give `["hello", var]`. - StrChunks(&'ast [StrChunk>]), + StringChunks(&'ast [StringChunk>]), /// A function. Fun { @@ -93,16 +83,16 @@ pub enum Node<'ast> { body: &'ast Ast<'ast>, }, - /// A let-binding. + /// A let block. Let { - bindings: &'ast [(Pattern<'ast>, Ast<'ast>)], + bindings: &'ast [LetBinding<'ast>], body: &'ast Ast<'ast>, rec: bool, }, /// An application to one or more arguments. App { - fun: &'ast Ast<'ast>, + head: &'ast Ast<'ast>, args: &'ast [Ast<'ast>], }, @@ -163,6 +153,95 @@ pub enum Node<'ast> { ParseError(&'ast ParseError), } +/// An individual binding in a let block. +#[derive(Debug, Clone, PartialEq)] +pub struct LetBinding<'ast> { + pub pattern: Pattern<'ast>, + pub metadata: LetMetadata<'ast>, + pub value: Ast<'ast>, +} + +/// The metadata that can be attached to a let. It's a subset of [record::FieldMetadata]. +#[derive(Debug, Default, Clone, PartialEq)] +pub struct LetMetadata<'ast> { + pub doc: Option>, + pub annotation: Annotation<'ast>, +} + +impl<'ast> From> for record::FieldMetadata<'ast> { + fn from(let_metadata: LetMetadata<'ast>) -> Self { + record::FieldMetadata { + annotation: let_metadata.annotation, + doc: let_metadata.doc, + ..Default::default() + } + } +} + +impl<'ast> TryFrom> for LetMetadata<'ast> { + type Error = (); + + fn try_from(field_metadata: record::FieldMetadata<'ast>) -> Result { + if let record::FieldMetadata { + doc, + annotation, + opt: false, + not_exported: false, + priority: MergePriority::Neutral, + } = field_metadata + { + Ok(LetMetadata { doc, annotation }) + } else { + Err(()) + } + } +} + +impl<'ast> Node<'ast> { + /// Tries to extract a static literal from string chunks. + /// + /// This methods returns a `Some(..)` when the term is a [Node::StringChunks] and all the + /// chunks are [StringChunk::Literal] + pub fn try_str_chunk_as_static_str(&self) -> Option { + match self { + Node::StringChunks(chunks) => { + chunks + .iter() + .try_fold(String::new(), |mut acc, next| match next { + StringChunk::Literal(lit) => { + acc.push_str(lit); + Some(acc) + } + _ => None, + }) + } + _ => None, + } + } + + /// Attaches a position to this node turning it into an [Ast]. + pub fn spanned(self, pos: TermPos) -> Ast<'ast> { + Ast { node: self, pos } + } +} + +/// A Nickel AST. Contains a root node and a span. +/// +//TODO: we don't expect to access the span much on the happy path. Should we add an indirection +//through a reference? +#[derive(Clone, Debug, PartialEq)] +pub struct Ast<'ast> { + pub node: Node<'ast>, + pub pos: TermPos, +} + +impl<'ast> Ast<'ast> { + /// Sets a new position for this AST node. + pub fn with_pos(self, pos: TermPos) -> Self { + Ast { pos, ..self } + } +} + /// A branch of a match expression. #[derive(Debug, PartialEq, Clone)] pub struct MatchBranch<'ast> { @@ -296,16 +375,24 @@ impl AstAlloc { Node::Number(self.number_arena.alloc(number)) } + pub fn number_move(&self, number: Number) -> &'_ Number { + self.number_arena.alloc(number) + } + pub fn string<'ast>(&'ast self, s: &str) -> Node<'ast> { Node::String(self.generic_arena.alloc_str(s)) } - pub fn str_chunks<'ast, I>(&'ast self, chunks: I) -> Node<'ast> + pub fn string_move<'ast>(&'ast self, s: &str) -> &'_ str { + self.generic_arena.alloc_str(s) + } + + pub fn string_chunks<'ast, I>(&'ast self, chunks: I) -> Node<'ast> where - I: IntoIterator>>, + I: IntoIterator>>, I::IntoIter: ExactSizeIterator, { - Node::StrChunks(self.generic_arena.alloc_slice_fill_iter(chunks)) + Node::StringChunks(self.generic_arena.alloc_slice_fill_iter(chunks)) } pub fn fun<'ast>(&'ast self, pat: Pattern<'ast>, body: Ast<'ast>) -> Node<'ast> { @@ -314,9 +401,23 @@ impl AstAlloc { Node::Fun { arg, body } } - pub fn let_binding<'ast, I>(&'ast self, bindings: I, body: Ast<'ast>, rec: bool) -> Node<'ast> + pub fn nary_fun<'ast, I>(&'ast self, args: I, body: Ast<'ast>) -> Node<'ast> + where + I: IntoIterator>, + I::IntoIter: DoubleEndedIterator, + { + args.into_iter() + .rev() + .fold(body, |body, arg| Ast { + node: self.fun(arg, body), + pos: TermPos::None, + }) + .node + } + + pub fn let_block<'ast, I>(&'ast self, bindings: I, body: Ast<'ast>, rec: bool) -> Node<'ast> where - I: IntoIterator, Ast<'ast>)>, + I: IntoIterator>, I::IntoIter: ExactSizeIterator, { let bindings = self.generic_arena.alloc_slice_fill_iter(bindings); @@ -329,13 +430,13 @@ impl AstAlloc { } } - pub fn app<'ast, I>(&'ast self, fun: Ast<'ast>, args: I) -> Node<'ast> + pub fn app<'ast, I>(&'ast self, head: Ast<'ast>, args: I) -> Node<'ast> where I: IntoIterator>, I::IntoIter: ExactSizeIterator, { Node::App { - fun: self.generic_arena.alloc(fun), + head: self.generic_arena.alloc(head), args: self.generic_arena.alloc_slice_fill_iter(args), } } @@ -437,14 +538,20 @@ impl AstAlloc { Node::Import(Import::Package { id }) } - /// As opposed to [Self::typ], this method takes an already constructed type and move it into - /// the arena, instead of taking each constituent separately. pub fn typ<'ast>(&'ast self, typ: Type<'ast>) -> Node<'ast> { Node::Type(self.generic_arena.alloc(typ)) } - pub fn typ_from_unr<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> Node<'ast> { - Node::Type(self.generic_arena.alloc(Type { typ, pos })) + pub fn type_from_unr<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> Node<'ast> { + Node::Type(self.type_move(Type { typ, pos })) + } + + pub fn type_data<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> &'ast Type<'ast> { + self.type_move(Type { typ, pos }) + } + + pub fn type_move<'ast>(&'ast self, typ: Type<'ast>) -> &'ast Type<'ast> { + self.generic_arena.alloc(typ) } pub fn types<'ast, I>(&'ast self, types: I) -> &'ast [Type<'ast>] @@ -459,10 +566,25 @@ impl AstAlloc { self.generic_arena.alloc(EnumRows(erows)) } + pub fn enum_rows_move<'ast>(&'ast self, erows: EnumRows<'ast>) -> &'ast EnumRows<'ast> { + self.generic_arena.alloc(erows) + } + pub fn record_rows<'ast>(&'ast self, rrows: RecordRowsUnr<'ast>) -> &'ast RecordRows<'ast> { self.generic_arena.alloc(RecordRows(rrows)) } + pub fn record_rows_move<'ast>(&'ast self, rrows: RecordRows<'ast>) -> &'ast RecordRows<'ast> { + self.generic_arena.alloc(rrows) + } + + pub fn record_row<'ast>(&'ast self, id: LocIdent, typ: Type<'ast>) -> &'ast RecordRow<'ast> { + self.generic_arena.alloc(RecordRow { + id, + typ: self.generic_arena.alloc(typ), + }) + } + pub fn parse_error(&self, error: ParseError) -> Node<'_> { Node::ParseError(self.error_arena.alloc(error)) } @@ -475,6 +597,14 @@ impl AstAlloc { self.generic_arena.alloc(pattern) } + pub fn patterns<'ast, I>(&'ast self, patterns: I) -> &'ast [Pattern<'ast>] + where + I: IntoIterator>, + I::IntoIter: ExactSizeIterator, + { + self.generic_arena.alloc_slice_fill_iter(patterns) + } + pub fn enum_pattern<'ast>( &'ast self, enum_pattern: EnumPattern<'ast>, @@ -489,6 +619,14 @@ impl AstAlloc { self.generic_arena.alloc(field_pat) } + pub fn field_patterns<'ast, I>(&'ast self, field_pats: I) -> &'ast [FieldPattern<'ast>] + where + I: IntoIterator>, + I::IntoIter: ExactSizeIterator, + { + self.generic_arena.alloc_slice_fill_iter(field_pats) + } + pub fn record_pattern<'ast, I>( &'ast self, patterns: I, @@ -499,10 +637,8 @@ impl AstAlloc { I: IntoIterator>, I::IntoIter: ExactSizeIterator, { - let patterns = self.generic_arena.alloc_slice_fill_iter(patterns); - self.generic_arena.alloc(RecordPattern { - patterns, + patterns: self.field_patterns(patterns), tail, pos, }) @@ -518,10 +654,8 @@ impl AstAlloc { I: IntoIterator>, I::IntoIter: ExactSizeIterator, { - let patterns = self.generic_arena.alloc_slice_fill_iter(patterns); - self.generic_arena.alloc(ArrayPattern { - patterns, + patterns: self.patterns(patterns), tail, pos, }) diff --git a/core/src/combine.rs b/core/src/combine.rs index 45950e4d86..a47381a821 100644 --- a/core/src/combine.rs +++ b/core/src/combine.rs @@ -1,6 +1,8 @@ //! Module for the Combine trait //! -//! Defines the `Combine` trait. +//! Defines `Combine` traits. + +use crate::bytecode::ast::AstAlloc; /// Trait for structures representing a series of annotation that can be combined (flattened). /// Pedantically, `Combine` is just a monoid. @@ -8,3 +10,11 @@ pub trait Combine: Default { /// Combine two elements. fn combine(left: Self, right: Self) -> Self; } + +/// [combine::Combine] doens't work for new ast nodes, which requires an external allocator to +/// create new nodes. This trait is a version that takes this additional allocator. It's temporary: +/// I suspect we won't need the original general `Combine` trait once we move to the bytecode vm, +/// as [crate::combine::Combine] is used mostly on ast-like data. +pub trait CombineAlloc<'ast> { + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self; +} diff --git a/core/src/eval/merge.rs b/core/src/eval/merge.rs index d2336015c5..a461ad0ab3 100644 --- a/core/src/eval/merge.rs +++ b/core/src/eval/merge.rs @@ -456,6 +456,8 @@ fn merge_fields<'a, C: Cache, I: DoubleEndedIterator + Clon /// This function is parametrized temporarily to accomodate both the mainline Nickel AST /// ([crate::term::Term]) where documentation is represented as a `String`, and the new bytecode /// AST where documentation is represented as an `Rc`. +//FIXME: remove the type parameter `D` once we've moved evaluation to the new bytecode VM. +//Currently we need to handle both the old representation `D=String` and the new one `D=Rc`. pub(crate) fn merge_doc(doc1: Option, doc2: Option) -> Option { //FIXME: how to merge documentation? Just concatenate? doc1.or(doc2) diff --git a/core/src/identifier.rs b/core/src/identifier.rs index c44a3b74c7..84df011d7d 100644 --- a/core/src/identifier.rs +++ b/core/src/identifier.rs @@ -41,6 +41,15 @@ impl Ident { increment!("Ident::fresh"); Self::new(format!("{}{}", GEN_PREFIX, GeneratedCounter::next())) } + + /// Attaches a position to this identifier, making it a `LocIdent`. + pub fn spanned(self, pos: TermPos) -> LocIdent { + LocIdent { + ident: self, + pos, + generated: self.label().starts_with(GEN_PREFIX), + } + } } impl fmt::Display for Ident { @@ -57,11 +66,7 @@ impl fmt::Debug for Ident { impl From for LocIdent { fn from(ident: Ident) -> Self { - LocIdent { - ident, - pos: TermPos::None, - generated: ident.label().starts_with(GEN_PREFIX), - } + ident.spanned(TermPos::None) } } diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index d8a9495a69..423715250f 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -17,9 +17,10 @@ //! This is not the case of all rules. Record literals and variables can both be //! interpreted in a different way, depending on how their usage. In //! `x : {foo : Num}`, `{foo : Num}` is interpreted as a record type. In `{foo : -//! Num}.foo`, it is a record literal with a missing definition for `foo`. The -//! first interpretation is **not** equivalent to first interpreting it as a -//! term, and then as a type. +//! Num}.foo`, it is a record literal with a missing definition for `foo` (note: +//! this latter form is now forbidden in the syntax). The first interpretation +//! is **not** equivalent to first interpreting it as a term, and then as a +//! type. //! //! For those reasons, the `uniterm` module introduces a new AST definition, that //! just wraps `RichTerm` and `Type`, together with dedicated variants for the @@ -30,13 +31,15 @@ //! possibly wrapped as a `UniTerm`). //! //! In consequence, this grammar uses three main types `RichTerm`, `Type` and -//! `UniTerm`, as well as conversion macros `AsTerm`, `AsType` and `AsUniTerm`. -//! Some rules that are known to only produce `RichTerm` or `Type` may have the +//! `UniTerm`, as well as conversion macros `AsExpr`, `AsType` and `AsUniTerm`. +//! +//! Rules that are known to only produce `RichTerm` or `Type` may have the //! corresponding more precise return type. Other rules that produce or just //! propagate general uniterms have to return a `UniTerm`. use std::{ ffi::OsString, convert::TryFrom, + iter, }; use lalrpop_util::ErrorRecovery; @@ -51,84 +54,115 @@ use super::{ use crate::{ files::FileId, - mk_app, - mk_opn, - mk_fun, - identifier::LocIdent, - term::{ + identifier::{Ident, LocIdent}, + combine::CombineAlloc, + bytecode::ast::{ *, - record::{RecordAttrs, Field, FieldMetadata}, - array::Array, - make as mk_term, + record::{Field, FieldMetadata}, pattern::*, + typ::*, + primop::{PrimOp, RecordOpKind}, }, - typ::*, + typ::{VarKind, DictTypeFlavour}, position::{TermPos, RawSpan}, - label::Label, - combine::Combine, + app, + primop_app, + fun, + label::MergeKind, }; use malachite::num::basic::traits::Zero; -grammar<'input, 'err, 'wcard>( +grammar<'input, 'ast, 'err, 'wcard>( + alloc: &'ast AstAlloc, src_id: FileId, errors: &'err mut Vec, ParseError>>, next_wildcard_id: &'wcard mut usize, ); -WithPos: Rule = => t.with_pos(mk_pos(src_id, l, r)); +// Takes a rule producing a `Node` and automatically attach a position to make it +// an `Ast`. +Spanned: Ast<'ast> = => + node.spanned(mk_pos(src_id, left, right)); + +// Takes a rule producing a `Node` and automatically attach a position to make it +// an `Ast`. +SpannedId: LocIdent = => + id.spanned(mk_pos(src_id, left, right)); + +// Takes a rule producing a `Node` and automatically attach a position to make it +// an `Ast`. +SpannedTy: Type<'ast> = => + ty.spanned(mk_pos(src_id, left, right)); -AsTerm: RichTerm = > =>? - RichTerm::try_from(ut) +// Takes a rule producing a spanned value with a `with_pos` method (can be an +// `Ast`, but not only) and re-assigns the position to the span of the rule. +WithPos: Rule = => + t.with_pos(mk_pos(src_id, left, right)); + +AsExpr: Ast<'ast> = > =>? + Ast::try_from(ut) .map_err(|e| lalrpop_util::ParseError::User{error: e}); -AsType: Type = > =>? +AsType: Type<'ast> = > =>? Type::try_from(ut) .map_err(|e| lalrpop_util::ParseError::User{error: e}); -AsUniTerm: UniTerm = > => UniTerm::from(ut); +// Repeat a rule zero times or more with a separator interspersed, such that the last +// separator is optional: for example, Delimiter will both accept +// `1,2` and `1,2,`. +RepeatSep: Vec = Sep)*> Sep? => { + elems.push(last); + elems +}; -AnnotSeries: AnnotAtom = => - <>.into_iter().fold(Default::default(), Combine::combine); +AsUniTerm: UniTerm<'ast> = > => UniTerm::from(ut); + +// Macro repeating a rule producing some form of annotation (that can be +// repeated and combined, typically field metadata). +AnnotSeries: AnnotAtom = => { + <> + .into_iter() + .fold(Default::default(), |acc, next| CombineAlloc::combine(alloc, acc, next)); +}; // A single type or contract annotation. The `Type` rule forbids the use of // constructs that can themselves have annotation on the right, such as a `let`. -// Otherwise, `foo | let x = 1 in x : Num` is ambiguous (the annotation could be -// either `foo | (let x = 1 in (x : Num))` or `(foo | let x = 1 in x) : Num`). +// Otherwise, `foo | let x = 1 in x : Number` is ambiguous (the annotation could +// be either `foo | (let x = 1 in (x : Number))` or `(foo | let x = 1 in x) : +// Number`). // // The rule to use for type annotations is given as a parameter. We always use a // rule that is syntactically equivalent to the `Type` rule. The parameter is // here to control if the type should have its variables fixed now (`FixedType`) // or later (bare `Type`). Almost all rules are of the former kind, and use // `FixedType` (see `FixedType` and `parser::utils::fix_type_vars`). -AnnotAtom: TypeAnnotation = { - "|" => TypeAnnotation { - contracts: vec![LabeledType {typ: ty.clone(), label: mk_label(ty, src_id, l, r)}], +AnnotAtom: Annotation<'ast> = { + "|" => Annotation { + contracts: alloc.types(iter::once(ty)), ..Default::default() }, - ":" => TypeAnnotation { - typ: Some(LabeledType {typ: ty.clone(), label: mk_label(ty, src_id, l, r)}), + ":" => Annotation { + typ: ty, ..Default::default() }, }; // A single metadata annotation attached to a let-binding. Compared to // annotations which can appear everywhere (`AnnotAtom`, either a type or a -// contract annotation), let annotations also include documentation (`doc`). -LetAnnotAtom: LetMetadata = { +// contract annotation), let annotations also include documentation (`doc`). As +// opposed to record fields, they can't express priority, optionality, etc. +LetAnnotAtom: LetMetadata<'ast> = { > => <>.into(), "|" "doc" => LetMetadata { - doc: Some(s), + doc: Some(s.into()), ..Default::default() }, } -// A single field metadata annotation, without the pseudo-metadata (such as -// recursive priorities). -// -// The rule to use for type annotations is given as a parameter (cf AnnotAtom -// rule). -SimpleFieldAnnotAtom: FieldMetadata = { +// A single field metadata annotation. The rule to use for type annotations is +// given as a parameter (cf AnnotAtom rule). +FieldAnnotAtom: FieldMetadata<'ast> = { > => <>.into(), "|" "default" => FieldMetadata { priority: MergePriority::Bottom, @@ -152,16 +186,21 @@ SimpleFieldAnnotAtom: FieldMetadata = { }, } -// A single field metadata annotation. -// -// The rule to use for type annotations is given as a parameter (cf AnnotAtom -// rule). -FieldAnnotAtom: FieldExtAnnot = { - > => <>.into(), // Recursive priorities are disabled as of 1.2.0. Their semantics is non trivial // to adapt to RFC005 that landed in 1.0.0, so they are currently on hold. If we // drop them altogether, we'll have to clean the corresponding code floating // around (not only in the parser, but in the internals module, etc.) +// +// The current `FieldAnnot` was named `SimpleFieldAnnot` before commenting this +// part out. If we restore recursive priorities, we might probably revert to the +// old naming. +// +// // A single field metadata annotation. +// +// // The rule to use for type annotations is given as a parameter (cf AnnotAtom +// // rule). +//FieldAnnotAtom: FieldExtAnnot = { +// > => <>.into(), // "|" "rec" "force" => FieldExtAnnot { // rec_force: true, // ..Default::default() @@ -170,209 +209,207 @@ FieldAnnotAtom: FieldExtAnnot = { // rec_default: true, // ..Default::default() // }, -} +//} // An annotation, with possibly many annotations chained. -Annot: TypeAnnotation = AnnotSeries>>; +Annot: Annotation<'ast> = AnnotSeries>>; // A let annotation, with possibly many annotations chained. Include type // annotations, contract annotations and doc annotations. -LetAnnot: LetMetadata = AnnotSeries>>; - -// A simple field annotation, with possibly many annotations chained. A simple -// field annotation excludes pseudo metadata like recursive priorities operator. -SimpleFieldAnnot: FieldMetadata = AnnotSeries>; +LetAnnot: LetMetadata<'ast> = AnnotSeries>>; // A field annotation, with possibly many annotations chained. -FieldAnnot: FieldExtAnnot = +FieldAnnot: FieldMetadata<'ast> = AnnotSeries>>; -// A general term. Wrap the root of the grammar as a `RichTerm`. -pub Term: RichTerm = AsTerm; +// A general expression. Wrap the root of the grammar as an `Ast`. +pub Expr: Ast<'ast> = AsExpr; // A general type. Chosen such that it can't have top-level annotations. // (see `AnnotAtom`) -Type: Type = { +Type: Type<'ast> = { AsType, - Forall, + SpannedTy, }; // A type with type variables fixed. See `parser::utils::fix_type_vars`. // // This rule is public and can be used from external modules to parse an input // directly as a type. -pub FixedType: Type = { +pub FixedType: Type<'ast> = { =>? { - ty.fix_type_vars(mk_span(src_id, l, r))?; + ty.fix_type_vars(alloc, mk_span(src_id, l, r))?; Ok(ty) } }; // Either a term or a top-level let-binding (a let-binding without an `in`). // Used exclusively for the REPL. -pub ExtendedTerm: ExtendedTerm = { - "let" ?> "=" => { +pub ExtendedTerm: ExtendedTerm<'ast> = { + "let" ?> "=" => { if let Some(ann) = ann { - t = ann.annotation.attach_term(t); + exp = ann.annotation.attach_to_ast(alloc, exp); } - ExtendedTerm::ToplevelLet(id, t) + ExtendedTerm::ToplevelLet(id, exp) }, - Term => ExtendedTerm::RichTerm(<>), + Expr => ExtendedTerm::Expr(<>), }; -LetBinding: LetBinding = { - ?> "=" => { - LetBinding { pattern, annot, value } +LetBinding: LetBinding<'ast> = { + ?> "=" => { + LetBinding { pattern, metadata: metadata.unwrap_or_default(), value } } } // A general uniterm. The root of the grammar. -UniTerm: UniTerm = { +UniTerm: UniTerm<'ast> = { InfixExpr, AnnotatedInfixExpr, AsUniTerm, - "let" - ",")*> ","? - "in" =>? { - bindings.push(last); - Ok(UniTerm::from(mk_let(recursive.is_some(), bindings, body)?)) + "let" + + > + "in" =>? { + Ok(UniTerm::from(mk_let( + alloc, + recursive.is_some(), + bindings, + body, + )?)) }, - "fun" "=>" => { + "fun" "=>" => { let pos = mk_pos(src_id, l, r); - let rt = pats.into_iter().rev().fold(t, |t, assgn| RichTerm { - term: SharedTerm::new(mk_fun(assgn, t)), - pos, - }); - UniTerm::from(rt) - }, - "if" "then" "else" => - UniTerm::from(mk_app!(Term::Op1(UnaryOp::IfThenElse, cond), t1, t2)), - => { - UniTerm::from(err) + let expr = pats + .into_iter() + .rev() + .fold(body, |built, next_arg| + alloc.fun(next_arg, built).spanned(pos) + ); + + UniTerm::from(expr) }, + "if" "then" "else" => + UniTerm::from(alloc.if_then_else(cond, e1, e2)), + => UniTerm::from(err), "import" =>? { - Ok(UniTerm::from(mk_import_based_on_filename(s, mk_span(src_id, l, r))?)) + Ok(UniTerm::from(mk_import_based_on_filename(alloc, s, mk_span(src_id, l, r))?)) }, "import" "as" =>? { - Ok(UniTerm::from(mk_import_explicit(s, t, mk_span(src_id, l, r))?)) + Ok(UniTerm::from(mk_import_explicit(alloc, s, t, mk_span(src_id, l, r))?)) }, "import" => { UniTerm::from(Term::Import(Import::Package { id: pkg.ident() })) } }; -AnnotatedInfixExpr: UniTerm = { - > > => { - UniTerm::from(ann.attach_term(t)) +AnnotatedInfixExpr: UniTerm<'ast> = { + > > => { + UniTerm::from(ann.attach_to_ast(alloc, e)) }, }; -Forall: Type = - "forall" "." > => { +Forall: TypeUnr<'ast> = + "forall" "." => { ids.into_iter().rev().fold( ty, // The variable kind will be determined during the `fix_type_vars` - // phase. For now, we put a random one (which is also the default - // one, for unused type variables) + // phase. For now, we put an arbitrary one (which is also the + // default one for unused type variables) |acc, var| { let pos = acc.pos; + Type { typ: TypeF::Forall { var, var_kind: VarKind::Type, - body: Box::new(acc) + body: alloc.type_move(acc), }, pos } } - ) + ).typ }; -// A n-ary application-like expression (n may be 0, in the sense that this rule -// also includes previous levels). -Applicative: UniTerm = { +// The possible heads of function application. The head of a multi-argument +// application is the leftmost part in ` ... `. +ApplicativeHead: UniTerm<'ast> = { Atom, AsUniTerm>, - > > => { - // We special case the application of an enum tag here. In principle, an - // enum variant applied to an argument is of different nature than a - // function application. However, for convenience, we made the syntax - // the same. So we now have to detect cases like `'Foo {x=1}` and - // convert that to a proper enum variant. - let term = if let Term::Enum(tag) = t1.as_ref() { - Term::EnumVariant { - tag: *tag, - arg: t2, - attrs: EnumVariantAttrs::default(), - } - } - else { - Term::App(t1, t2) + > => UniTerm::from(primop_app!(alloc, op, t)), + > > + => UniTerm::from(primop_app!(alloc, op, t1, t2)), + NOpPre>, + "match" "{" > "}" => UniTerm::from(alloc.match_expr(branches)), +}; + +// A n-ary application-like expression (n may be 0, in the sense that this rule +// also includes previous levels). +Applicative: UniTerm<'ast> = { + > *> => { + let node = match head { + // A zero-ary application is just the head. + _ if args.is_empty() => head.node, + // We special case the application of an enum tag here. In principle, an + // enum variant applied to an argument is of different nature than a + // function application. However, for convenience, we made the syntax + // the same. So we now have to detect cases like `'Foo {x=1}` and + // convert that to a proper enum variant. + Node::EnumVariant { tag, arg: None } if args.len() == 1 => + alloc.enum_variant(*tag, args.pop()), + _ => alloc.app(head, args), }; - UniTerm::from(term) + UniTerm::from(node) }, - > => UniTerm::from(mk_term::op1(op, t)), - > > - => UniTerm::from(mk_term::op2(op, t1, t2)), - NOpPre>, - "match" "{" "}" => { - let branches = branches - .into_iter() - .map(|(branch, _comma)| branch) - .chain(last) - .collect(); - - UniTerm::from(Term::Match(MatchData { branches })) - } }; // The parametrized array type. -TypeArray: Type = "Array" > => +TypeArray: TypeUnr<'ast> = "Array" > => // For some reason, we have to bind the type into a `t` // rather than using the usual `<>` placeholder, otherwise, // it doesn't compile. - Type::from(TypeF::Array(Box::new(t))); + TypeF::Array(alloc.type_move(t)); // A record operation chain, such as `{foo = data}.bar.baz`. -RecordOperationChain: RichTerm = { - > "." => mk_term::op1(UnaryOp::RecordAccess(id), t).with_pos(id.pos), - > "." > => mk_access(t_id, t), +RecordOperationChain: Node<'ast> = { + > "." => + alloc.prim_op(PrimOp::RecordStatAccess(id), iter::once(e)), + > "." > => mk_access(alloc, t_id, e), }; -RecordRowTail: RecordRows = { +RecordRowTail: RecordRows<'ast> = { => RecordRows(RecordRowsF::TailVar(<>)), "Dyn" => RecordRows(RecordRowsF::TailDyn), }; // A record, that can be later interpreted either as a record literal or as a // record type. -UniRecord: UniRecord = { +UniRecord: UniRecord<'ast> = { "{" ",")*> "}" => { - let (last_field, attrs) = match last { + let (last_field, open) = match last { Some(RecordLastField::Field(f)) => (Some(f), Default::default()), - Some(RecordLastField::Ellipsis) => - (None, RecordAttrs { open: true, ..Default::default() }), + Some(RecordLastField::Ellipsis) => (None, true), None => (None, Default::default()) }; - let pos_ellipsis = if attrs.open { - mk_pos(src_id, last_l, last_r) - } - else { - TermPos::None - }; + let pos_ellipsis = if open { + mk_pos(src_id, last_l, last_r) + } + else { + TermPos::None + }; let fields : Vec<_> = fields.into_iter().chain(last_field.into_iter()).collect(); + UniRecord { fields, tail: tail.map(|t| (t.1, mk_pos(src_id, tail_l, tail_r))), - attrs, + open, pos: TermPos::None, pos_ellipsis, } @@ -386,61 +423,38 @@ NumberLiteral: Number = { <"bin num literal">, }; -Atom: UniTerm = { +Atom: UniTerm<'ast> = { "(" > ")", "(" ")", - NumberLiteral => UniTerm::from(Term::Num(<>)), - "null" => UniTerm::from(Term::Null), - Bool => UniTerm::from(Term::Bool(<>)), - AsUniTerm, + NumberLiteral => UniTerm::from(alloc.number(<>)), + "null" => UniTerm::from(Node::Null), + Bool => UniTerm::from(Node::Bool(<>)), + AsUniTerm, Ident => UniTerm::from(UniTermNode::Var(<>)), WithPos => UniTerm::from(UniTermNode::Record(<>)), - => UniTerm::from(Term::Enum(<>)), - "[" ",")*> "]" => { - let terms = terms - .into_iter() - .chain(last.into_iter()) - .collect(); - - UniTerm::from(Term::Array(terms, Default::default())) - }, - AsUniTerm>, - AsUniTerm, + EnumTag => UniTerm::from(Node::EnumVariant { tag: <>, arg: None }), + "[" > "]" => UniTerm::from(alloc.array(<>)), + AsUniTerm>, + AsUniTerm>, }; // A record field definition. The is the only place where we don't fix the type // variables inside the annotation right away (note the `Annot` instead // of `Annot`). -RecordField: FieldDef = { - ?> Field { value, ..Default::default() } - }; - +RecordField: FieldDef<'ast> = { + + + ?> + RecordLastField::Ellipsis, }; // A field path syntax in a field definition, as in `{foo."bar bar".baz = "value"}`. -FieldPath: FieldPath = { +FieldPath: FieldPath<'ast> = { ".")*> => { elems.push(last); elems @@ -478,17 +494,17 @@ FieldPath: FieldPath = { // A field path which only contains static string literals, that is, without any // interpolated expression in it. -pub StaticFieldPath: Vec = =>? { +pub StaticFieldPath: Vec = =>? { field_path .into_iter() .map(|elem| match elem { FieldPathElem::Ident(ident) => Ok(ident), FieldPathElem::Expr(expr) => { - let as_string = expr.as_ref().try_str_chunk_as_static_str().ok_or( + let as_string = expr.node.try_str_chunk_as_static_str().ok_or( ParseError::InterpolationInStaticPath { path_elem_span: expr.pos .into_opt() - .unwrap_or_else(|| mk_span(src_id, start, end)), + .unwrap_or_else(|| mk_span(src_id, l, r)), }, )?; Ok(LocIdent::new_with_pos(as_string, expr.pos)) @@ -501,32 +517,31 @@ pub StaticFieldPath: Vec = rule produces a -// RichTerm anyway, so it's simpler to just return it instead of artificially -// deconstructing it. +// We could just return a `Node` instead of a `Ast`, as position information is +// already stored in the span. But the rule produces an Ast anyway, so +// it's simpler to just return it instead of artificially deconstructing it. // // This rule is currently only used for the CLI and isn't part of the grammar // for normal Nickel source code. -pub CliFieldAssignment: (Vec, RichTerm, RawSpan) = - "=" > +pub CliFieldAssignment: (Vec, Ast<'ast>, RawSpan) = + "=" => (path, value, mk_span(src_id, start, end)); -FieldPathElem: FieldPathElem = { +FieldPathElem: FieldPathElem<'ast> = { => FieldPathElem::Ident(<>), - > => FieldPathElem::Expr(<>), + > => FieldPathElem::Expr(<>), }; // A pattern. @@ -557,7 +572,7 @@ FieldPathElem: FieldPathElem = { // always interpreted as `fun ('Foo) ('Bar) => ...`. The other interpretation // can be written as `fun ('Foo 'Bar) => ...`. // -// We allow parenthesized enum variants pattern in general pattern as well, not +// We allow parenthesized enum variant patterns in general patterns as well, not // only for consistency, but because they also make nested enum variant patterns // more readable: `'Foo ('Bar 5)` vs `'Foo 'Bar 5`. In fact, we also force // nested enum patterns to be parenthesized, and forbid the latter, for better @@ -615,9 +630,9 @@ FieldPathElem: FieldPathElem = { // we can ensure there's only one way to parse each and every combination with // only one look-ahead, thus satisfying the LR(1). #[inline] -PatternF: Pattern = { +PatternF: Pattern<'ast> = { - > "@")?> + "@")?> > => { Pattern { @@ -629,24 +644,24 @@ PatternF: Pattern = { }; #[inline] -PatternDataF: PatternData = { - RecordPattern => PatternData::Record(<>), - ArrayPattern => PatternData::Array(<>), - ConstantPattern => PatternData::Constant(<>), - EnumRule => PatternData::Enum(<>), - OrRule => PatternData::Or(<>), +PatternDataF: PatternData<'ast> = { + RecordPattern => PatternData::Record(alloc.record_pattern(<>)), + ArrayPattern => PatternData::Array(alloc.array_pattern(<>)), + ConstantPattern => PatternData::Constant(alloc.constant_pattern(<>)), + EnumRule => PatternData::Enum(alloc.enum_pattern(<>)), + OrRule => PatternData::Or(alloc.or_pattern(<>)), IdentRule => PatternData::Any(<>), "_" => PatternData::Wildcard, }; // A general pattern, unrestricted. #[inline] -Pattern: Pattern = PatternF; +Pattern: Pattern<'ast> = PatternF; // A pattern restricted to function arguments, which requires or-patterns and // enum variant patterns to be parenthesized at the top-level. #[inline] -PatternFun: Pattern = PatternF; +PatternFun: Pattern<'ast> = PatternF; // A pattern that can be used within a branch of an or-pattern. To avoid a // shift-reduce conflicts (because we want to allow `or` to remain a valid @@ -663,7 +678,7 @@ PatternFun: Pattern = PatternF; // // See the `PatternF` rule for an explanation of why we need those restrictions. #[inline] -PatternOrBranch: Pattern = +PatternOrBranch: Pattern<'ast> = > => { @@ -674,24 +689,24 @@ PatternOrBranch: Pattern = } }; -ConstantPattern: ConstantPattern = { +ConstantPattern: ConstantPattern<'ast> = { => ConstantPattern { data, pos: mk_pos(src_id, start, end) } }; -ConstantPatternData: ConstantPatternData = { +ConstantPatternData: ConstantPatternData<'ast> = { Bool => ConstantPatternData::Bool(<>), - NumberLiteral => ConstantPatternData::Number(<>), + NumberLiteral => ConstantPatternData::Number(alloc.number_move(<>)), // We could accept multiline strings here, but it's unlikely that this will // result in very readable match expressions. For now we restrict ourselves // to standard string; we can always extend to multiline later if needed - StandardStaticString => ConstantPatternData::String(<>.into()), + StandardStaticString => ConstantPatternData::String(alloc.string_move(&<>)), "null" => ConstantPatternData::Null, }; -RecordPattern: RecordPattern = { +RecordPattern: RecordPattern<'ast> = { "{" ",")*> "}" =>? { let tail = match last { Some(LastPattern::Normal(m)) => { @@ -708,17 +723,17 @@ RecordPattern: RecordPattern = { }; let pattern = RecordPattern { - patterns: field_pats, + patterns: alloc.field_patterns(field_pats), tail, pos: mk_pos(src_id, start, end) }; - pattern.check_dup()?; + pattern.check_dup()?; Ok(pattern) }, }; -ArrayPattern: ArrayPattern = { +ArrayPattern: ArrayPattern<'ast> = { "[" ",")*> "]" => { let tail = match last { Some(LastPattern::Normal(m)) => { @@ -734,18 +749,16 @@ ArrayPattern: ArrayPattern = { None => TailPattern::Empty, }; - let pattern = ArrayPattern{ - patterns, + ArrayPattern { + patterns: alloc.patterns(patterns), tail, pos: mk_pos(src_id, start, end) - }; - - pattern + } }, }; // A pattern for an enum tag (without argument). -EnumTagPattern: EnumPattern = => EnumPattern { +EnumTagPattern: EnumPattern<'ast> = => EnumPattern { tag, pattern: None, pos: mk_pos(src_id, start, end), @@ -753,33 +766,33 @@ EnumTagPattern: EnumPattern = => EnumPatter // A rule which only matches an enum variant pattern of the form `' or`. // Used to disambiguate between an enum variant pattern and an or-pattern. -EnumVariantOrPattern: EnumPattern = +EnumVariantOrPattern: EnumPattern<'ast> = - > + > => { let pos_or = or_arg.pos; EnumPattern { tag, - pattern: Some(Box::new(Pattern { + pattern: Some(Pattern { data: PatternData::Any(or_arg), alias: None, pos: pos_or, - })), + }), pos: mk_pos(src_id, start, end), } }; // An enum variant pattern, excluding the `EnumVariantPatternOr` case: that is, // this rule doesn't match the case `' or`. -EnumVariantNoOrPattern: EnumPattern = +EnumVariantNoOrPattern: EnumPattern<'ast> = - >> + >> => EnumPattern { tag, - pattern: Some(Box::new(pattern)), + pattern: Some(pattern), pos: mk_pos(src_id, start, end), }; @@ -791,7 +804,7 @@ EnumVariantNoOrPattern: EnumPattern = // or-pattern, as in `'Foo or 'Bar`; but as long as we parse this common // prefix using the same rule and only disambiguate later, there is no // shift/reduce conflict. -EnumVariantPattern: EnumPattern = { +EnumVariantPattern: EnumPattern<'ast> = { EnumVariantOrPattern, EnumVariantNoOrPattern, }; @@ -800,7 +813,7 @@ EnumVariantPattern: EnumPattern = { // or-pattern. As we parse `EnumVariantOrPattern` and treat it specifically in // an `or` branch (`OrPatternBranch`), we need to remove it from the enum // pattern rule. -EnumPatternOrBranch: EnumPattern = { +EnumPatternOrBranch: EnumPattern<'ast> = { EnumVariantNoOrPattern, // Only a top-level un-parenthesized enum variant pattern can be ambiguous. // If it's parenthesized, we allow the general version including the "or" @@ -811,7 +824,7 @@ EnumPatternOrBranch: EnumPattern = { // An unparenthesized enum pattern (including both enum tags and enum // variants). -EnumPatternUnparens: EnumPattern = { +EnumPatternUnparens: EnumPattern<'ast> = { EnumTagPattern, EnumVariantPattern, }; @@ -819,14 +832,14 @@ EnumPatternUnparens: EnumPattern = { // A parenthesized enum pattern, including both tags and variants (note that an // enum tag alone is never parenthesized: parentheses only applies to enum // variant patterns). -EnumPatternParens: EnumPattern = { +EnumPatternParens: EnumPattern<'ast> = { EnumTagPattern, "(" ")", } // The unrestricted rule for enum patterns. Allows both enum tags and enum // variants, and both parenthesized and un-parenthesized enum variants. -EnumPattern: EnumPattern = { +EnumPattern: EnumPattern<'ast> = { EnumTagPattern, EnumVariantPattern, "(" ")" @@ -835,21 +848,21 @@ EnumPattern: EnumPattern = { // An individual element of an or-pattern, plus a trailing "or". This rule is a // bit artificial, and is essentially here to dispel the shift/reduce conflict // around `'Foo or`/`'Foo or 'Bar` explained in the description of `PatternF`. -OrPatternBranch: Pattern = { +OrPatternBranch: Pattern<'ast> = { // To avoid various shift-reduce conflicts, the patterns used within an // `or`-branch have several restrictions. See the `PatternOrBranch` rule. "or", // A variant pattern of the form `' or`. The trick is to instead // consider it as the enum tag pattern `'` followed by the `or` // contextual keyword after-the-fact. - => { - let pos = pat.pos; + EnumVariantOrPattern => { + let pos = <>.pos; Pattern { pos, alias: None, data: PatternData::Enum(EnumPattern { - tag: pat.tag, + tag: <>.tag, pattern: None, pos, }), @@ -858,7 +871,7 @@ OrPatternBranch: Pattern = { }; // Unparenthesized or-pattern. -OrPatternUnparens: OrPattern = { +OrPatternUnparens: OrPattern<'ast> = { > @@ -867,25 +880,25 @@ OrPatternUnparens: OrPattern = { patterns.into_iter().chain(std::iter::once(last)).collect(); OrPattern { - patterns, + patterns: alloc.patterns(patterns), pos: mk_pos(src_id, start, end), } }, }; // Parenthesized or-pattern. -OrPatternParens: OrPattern = { +OrPatternParens: OrPattern<'ast> = { "(" ")", }; // Unrestricted or-pattern, which can be parenthesized or not. -OrPattern: OrPattern = { +OrPattern: OrPattern<'ast> = { OrPatternUnparens, OrPatternParens, } // A binding `ident = ` inside a record pattern. -FieldPattern: FieldPattern = { +FieldPattern: FieldPattern<'ast> = { ?> "=" => FieldPattern { matched_id, @@ -909,29 +922,29 @@ FieldPattern: FieldPattern = { }; // Last field pattern of a record pattern -LastFieldPat: LastPattern = { - FieldPattern => LastPattern::Normal(Box::new(<>)), +LastFieldPat: LastPattern<'ast, FieldPattern<'ast>> = { + FieldPattern => LastPattern::Normal(alloc.field_pattern(<>)), ".." => LastPattern::Ellipsis(<>), }; // Last pattern of an array pattern -LastElemPat: LastPattern = { - Pattern => LastPattern::Normal(Box::new(<>)), +LastElemPat: LastPattern<'ast, Pattern<'ast>> = { + Pattern => LastPattern::Normal(alloc.pattern(<>)), ".." => LastPattern::Ellipsis(<>), } // A default annotation in a pattern. -DefaultAnnot: RichTerm = "?" => t; +DefaultAnnot: Ast<'ast> = "?" ; // A metadata keyword returned as an indent. In some positions, those are // considered valid identifiers. See ExtendedIdent below. -MetadataKeyword: LocIdent = { - "doc" => LocIdent::new("doc"), - "default" => LocIdent::new("default"), - "force" => LocIdent::new("force"), - "priority" => LocIdent::new("priority"), - "optional" => LocIdent::new("optional"), - "not_exported" => LocIdent::new("not_exported"), +MetadataKeyword: Ident = { + "doc" => Ident::new("doc"), + "default" => Ident::new("default"), + "force" => Ident::new("force"), + "priority" => Ident::new("priority"), + "optional" => Ident::new("optional"), + "not_exported" => Ident::new("not_exported"), }; // We allow metadata keywords (optional, default, doc, etc.) as field names @@ -943,25 +956,24 @@ MetadataKeyword: LocIdent = { // // Thus, for fields, ExtendedIdent is use in place of Ident. ExtendedIdent: LocIdent = { - WithPos, + SpannedId, Ident, }; // The "or" contextual keyword, parsed as an indent. -IdentOr: LocIdent = "or" => LocIdent::new("or"); +IdentOr: Ident = "or" => Ident::new("or"); // The "as" contextual keyword, parsed as an indent. -IdentAs: LocIdent = "as" => LocIdent::new("as"); - +IdentAs: Ident = "as" => Ident::new("as"); // The set of pure identifiers, which are never keywords in any context. -RestrictedIdent: LocIdent = "identifier" => LocIdent::new(<>); +RestrictedIdent: Ident = "identifier" => Ident::new(<>); // Identifiers allowed everywhere, which includes pure identifiers and contextual // keywords. #[inline] Ident: LocIdent = { - WithPos, - WithPos, - WithPos, + SpannedId, + SpannedId, + SpannedId, }; Bool: bool = { @@ -970,20 +982,23 @@ Bool: bool = { }; // String-like syntax which supports interpolation. -// Depending on the opening brace, these either parse as strings, or as "symbolic strings", -// which get desugared here to an array of terms. -StrChunks: RichTerm = { +// +// Depending on the opening brace, these either parse as strings, or as +// "symbolic strings", which get desugared here to an array of terms. +StringChunks: Node<'ast> = { + // The lexer emits a stream of groups of `ChunkExpr` interspersed by one + // `ChunkLiteral`: consecutive chunks literals are fused by the lexer. => { debug_assert!( start.is_closed_by(&end), "Fatal parser error: a string starting with {start:?} should never be closed by {end:?}" ); - let chunks: Vec> = fst.into_iter() - .map(StrChunk::Literal) + let chunks: Vec>> = fst.into_iter() + .map(StringChunk::Literal) .chain(chunks.into_iter() .map(|(mut es, s)| { - es.push(StrChunk::Literal(s)); + es.push(StringChunk::Literal(s)); es }) .flatten()) @@ -996,30 +1011,33 @@ StrChunks: RichTerm = { chunks }; + // In the case of symbolic strings, we don't produce a string (in + // practice string chunks). The chunks are reified to an Nickel array + // and wrapped in a record instead. if let StringStartDelimiter::Symbolic(prefix) = start { let terms = chunks.into_iter().map(|chunk| match chunk { - StrChunk::Literal(_) => Term::StrChunks(vec![chunk]).into(), - StrChunk::Expr(e, _) => e, + StringChunk::Literal(_) => alloc.string_chunks(iter::once(chunk)), + StringChunk::Expr(e, _) => e, }).collect(); - RichTerm::from(build_record([ + build_record([ ( FieldPathElem::Ident("tag".into()), - Field::from(RichTerm::from(Term::Enum("SymbolicString".into()))) + Field::from(Ast::from(builder::enum_tag("SymbolicString"))) ), ( FieldPathElem::Ident("prefix".into()), - Field::from(RichTerm::from(Term::Enum(prefix.into()))) + Field::from(Ast::from(builder::enum_tag(prefix))) ), ( FieldPathElem::Ident("fragments".into()), - Field::from(RichTerm::from(Term::Array(terms, Default::default()))) + Field::from(Ast::from(alloc.array(terms))) ) - ], Default::default())) + ], Default::default()) } else { let mut chunks = chunks; chunks.reverse(); - RichTerm::from(Term::StrChunks(chunks)) + alloc.string_chunks(chunks) } }, }; @@ -1047,36 +1065,43 @@ ChunkLiteral : String = }) }; -ChunkExpr: StrChunk = Interpolation > "}" => StrChunk::Expr(t, 0); +// An interpolated expression in a string: `%{}`. +ChunkExpr: StringChunk> = Interpolation "}" => StringChunk::Expr(<>, 0); +// The opening sequence of string interpolation. Interpolation = { "%{", "multstr %{" }; // A construct which looks like a string, but is generic over its delimiters. // Used to implement `StaticString` as well as `StringEnumTag`. DelimitedStaticString: String = Start End => s.unwrap_or_default(); +// A static string using the basic string syntax (delimited by double quotes). StandardStaticString = DelimitedStaticString<"\"", "\"">; +// A static string using the multiline string syntax. MultilineStaticString: String = DelimitedStaticString<"m%\"","\"%"> => { // strip the common indentation prefix - let chunks: Vec> = vec![StrChunk::Literal(<>)]; + let chunks: Vec>> = vec![StringChunk::Literal(<>)]; match strip_indent(chunks).pop().unwrap() { - StrChunk::Literal(s) => s, + StringChunk::Literal(s) => s, // We build _ => unreachable!(), } }; -StaticString : String = { +// A string which must be known statically without having to run the program. In +// practice, it's a string where interpolation isn't allowed. +StaticString: String = { StandardStaticString, MultilineStaticString, } +// A quoted enum tag, which can contain spaces or other special characters. StringEnumTag = DelimitedStaticString<"'\"", "\"">; EnumTag: LocIdent = { "raw enum tag" => <>.into(), - => <>.into(), + StringEnumTag => <>.into(), }; ChunkLiteralPart: ChunkLiteralPart = { @@ -1085,46 +1110,46 @@ ChunkLiteralPart: ChunkLiteralPart = { "str esc char" => ChunkLiteralPart::Char(<>), }; -UOp: UnaryOp = { - "typeof" => UnaryOp::Typeof, - "blame" => UnaryOp::Blame, - "label/flip_polarity" => UnaryOp::LabelFlipPol, - "label/polarity" => UnaryOp::LabelPol, - "label/go_dom" => UnaryOp::LabelGoDom, - "label/go_codom" => UnaryOp::LabelGoCodom, - "label/go_array" => UnaryOp::LabelGoArray, - "label/go_dict" => UnaryOp::LabelGoDict, - "enum/embed" => UnaryOp::EnumEmbed(<>), - "array/map" => UnaryOp::ArrayMap, - "array/generate" => UnaryOp::ArrayGen, - "record/map" => UnaryOp::RecordMap, - "seq" => UnaryOp::Seq, - "deep_seq" => UnaryOp::DeepSeq, - "op force" => UnaryOp::Force{ ignore_not_exported: false }, - "array/length" => UnaryOp::ArrayLength, - "record/fields" => UnaryOp::RecordFields(RecordOpKind::IgnoreEmptyOpt), - "record/fields_with_opts" => UnaryOp::RecordFields(RecordOpKind::ConsiderAllFields), - "record/values" => UnaryOp::RecordValues, - "string/trim" => UnaryOp::StringTrim, - "string/chars" => UnaryOp::StringChars, - "string/uppercase" => UnaryOp::StringUppercase, - "string/lowercase" => UnaryOp::StringLowercase, - "string/length" => UnaryOp::StringLength, - "to_string" => UnaryOp::ToString, - "number/from_string" => UnaryOp::NumberFromString, - "enum/from_string" => UnaryOp::EnumFromString, - "string/is_match" => UnaryOp::StringIsMatch, - "string/find" => UnaryOp::StringFind, - "string/find_all" => UnaryOp::StringFindAll, - "op rec_force" => UnaryOp::RecForce, - "op rec_default" => UnaryOp::RecDefault, - "record/empty_with_tail" => UnaryOp::RecordEmptyWithTail, - "trace" => UnaryOp::Trace, - "label/push_diag" => UnaryOp::LabelPushDiag, +UOp: PrimOp = { + "typeof" => PrimOp::Typeof, + "blame" => PrimOp::Blame, + "label/flip_polarity" => PrimOp::LabelFlipPol, + "label/polarity" => PrimOp::LabelPol, + "label/go_dom" => PrimOp::LabelGoDom, + "label/go_codom" => PrimOp::LabelGoCodom, + "label/go_array" => PrimOp::LabelGoArray, + "label/go_dict" => PrimOp::LabelGoDict, + "enum/embed" => PrimOp::EnumEmbed(<>), + "array/map" => PrimOp::ArrayMap, + "array/generate" => PrimOp::ArrayGen, + "record/map" => PrimOp::RecordMap, + "seq" => PrimOp::Seq, + "deep_seq" => PrimOp::DeepSeq, + "op force" => PrimOp::Force{ ignore_not_exported: false }, + "array/length" => PrimOp::ArrayLength, + "record/fields" => PrimOp::RecordFields(RecordOpKind::IgnoreEmptyOpt), + "record/fields_with_opts" => PrimOp::RecordFields(RecordOpKind::ConsiderAllFields), + "record/values" => PrimOp::RecordValues, + "string/trim" => PrimOp::StringTrim, + "string/chars" => PrimOp::StringChars, + "string/uppercase" => PrimOp::StringUppercase, + "string/lowercase" => PrimOp::StringLowercase, + "string/length" => PrimOp::StringLength, + "to_string" => PrimOp::ToString, + "number/from_string" => PrimOp::NumberFromString, + "enum/from_string" => PrimOp::EnumFromString, + "string/is_match" => PrimOp::StringIsMatch, + "string/find" => PrimOp::StringFind, + "string/find_all" => PrimOp::StringFindAll, + "op rec_force" => PrimOp::RecForce, + "op rec_default" => PrimOp::RecDefault, + "record/empty_with_tail" => PrimOp::RecordEmptyWithTail, + "trace" => PrimOp::Trace, + "label/push_diag" => PrimOp::LabelPushDiag, "eval_nix" =>? { #[cfg(feature = "nix-experimental")] { - Ok(UnaryOp::EvalNix) + Ok(PrimOp::EvalNix) } #[cfg(not(feature = "nix-experimental"))] { @@ -1136,68 +1161,68 @@ UOp: UnaryOp = { }) } }, - "enum/get_arg" => UnaryOp::EnumGetArg, - "enum/make_variant" => UnaryOp::EnumMakeVariant, - "enum/is_variant" => UnaryOp::EnumIsVariant, - "enum/get_tag" => UnaryOp::EnumGetTag, - "contract/custom" => UnaryOp::ContractCustom, - "number/arccos" => UnaryOp::NumberArcCos, - "number/arcsin" => UnaryOp::NumberArcSin, - "number/arctan" => UnaryOp::NumberArcTan, - "number/cos" => UnaryOp::NumberCos, - "number/sin" => UnaryOp::NumberSin, - "number/tan" => UnaryOp::NumberTan, + "enum/get_arg" => PrimOp::EnumGetArg, + "enum/make_variant" => PrimOp::EnumMakeVariant, + "enum/is_variant" => PrimOp::EnumIsVariant, + "enum/get_tag" => PrimOp::EnumGetTag, + "contract/custom" => PrimOp::ContractCustom, + "number/arccos" => PrimOp::NumberArcCos, + "number/arcsin" => PrimOp::NumberArcSin, + "number/arctan" => PrimOp::NumberArcTan, + "number/cos" => PrimOp::NumberCos, + "number/sin" => PrimOp::NumberSin, + "number/tan" => PrimOp::NumberTan, } -PatternGuard: RichTerm = "if" => <>; +PatternGuard: Ast<'ast> = "if" => <>; -MatchBranch: MatchBranch = - "=>" => +MatchBranch: MatchBranch<'ast> = + "=>" => MatchBranch { pattern, guard, body}; // Infix operators by precedence levels. Lowest levels take precedence over // highest ones. -InfixBOp2: BinaryOp = { - "++" => BinaryOp::StringConcat, - "@" => BinaryOp::ArrayConcat, +InfixBOp2: PrimOp = { + "++" => PrimOp::StringConcat, + "@" => PrimOp::ArrayConcat, } -InfixBOp3: BinaryOp = { - "*" => BinaryOp::Mult, - "/" => BinaryOp::Div, - "%" => BinaryOp::Modulo, +InfixBOp3: PrimOp = { + "*" => PrimOp::Mult, + "/" => PrimOp::Div, + "%" => PrimOp::Modulo, } -InfixBOp4: BinaryOp = { - "+" => BinaryOp::Plus, - "-" => BinaryOp::Sub, +InfixBOp4: PrimOp = { + "+" => PrimOp::Plus, + "-" => PrimOp::Sub, } -InfixUOp5: UnaryOp = { - "!" => UnaryOp::BoolNot, +InfixUOp5: PrimOp = { + "!" => PrimOp::BoolNot, } -InfixBOp7: BinaryOp = { - "<" => BinaryOp::LessThan, - "<=" => BinaryOp::LessOrEq, - ">" => BinaryOp::GreaterThan, - ">=" => BinaryOp::GreaterOrEq, +InfixBOp7: PrimOp = { + "<" => PrimOp::LessThan, + "<=" => PrimOp::LessOrEq, + ">" => PrimOp::GreaterThan, + ">=" => PrimOp::GreaterOrEq, } -InfixBOp8: BinaryOp = { - "==" => BinaryOp::Eq, +InfixBOp8: PrimOp = { + "==" => PrimOp::Eq, } -InfixLazyBOp9: UnaryOp = { - "&&" => UnaryOp::BoolAnd, +InfixLazyBOp9: PrimOp = { + "&&" => PrimOp::BoolAnd, } -InfixLazyBOp10: UnaryOp = { - "||" => UnaryOp::BoolOr, +InfixLazyBOp10: PrimOp = { + "||" => PrimOp::BoolOr, } -InfixBOp: BinaryOp = { +InfixBOp: PrimOp = { InfixBOp2, InfixBOp3, InfixBOp4, @@ -1205,7 +1230,7 @@ InfixBOp: BinaryOp = { InfixBOp8, } -InfixUOpOrLazyBOp: UnaryOp = { +InfixUOpOrLazyBOp: PrimOp = { InfixUOp5, InfixLazyBOp9, InfixLazyBOp10, @@ -1216,64 +1241,86 @@ InfixOp: InfixOp = { => <>.into(), } -CurriedOp: RichTerm = { - => - op.eta_expand(mk_pos(src_id, l, r)), - "&" => - InfixOp::from(BinaryOp::Merge(mk_merge_label(src_id, l, r))) - .eta_expand(mk_pos(src_id, l, r)), - "|>" => - mk_fun!("x1", "x2", - mk_app!(mk_term::var("x2"), mk_term::var("x1")) - .with_pos(mk_pos(src_id, l, r)) - ), - "!=" => - mk_fun!("x1", "x2", - mk_term::op1( - UnaryOp::BoolNot, - Term::Op2(BinaryOp::Eq, - mk_term::var("x1"), - mk_term::var("x2") - ) +//TODO[RFC007]: restore proper operation positions +CurriedOp: Node<'ast> = { + InfixOp => <>.eta_expand(alloc), + "&" => InfixOp::from(PrimOp::Merge(MergeKind::Standard)).eta_expand(alloc), + "|>" => { + let fst_arg = LocIdent::fresh(); + let snd_arg = LocIdent::fresh(); + + fun!( + alloc, + fst_arg, + snd_arg, + app!( + alloc, + builder::var(snd_arg), + builder::var(fst_arg), + ), + ) + }, + "!=" => { + let fst_arg = LocIdent::fresh(); + let snd_arg = LocIdent::fresh(); + + fun!( + alloc, + fst_arg, + snd_arg, + primop_app!( + alloc, + PrimOp::BoolNot, + primop_app!( + alloc, + PrimOp::Eq, + builder::var(fst_arg), + builder::var(snd_arg), + ), + ), + ) + }, + //`foo.bar` is a static record access, but when used in a curried form, it's + //a dynamic record access (that is, `(.) foo bar` is `foo."%{bar}"`). It + //turns out a dynamic record access takes the record as the last argument, + //in the style of the stdlib. If we want `(.) foo bar` to be `foo."%{bar}"`, + //we thus have to flip the arguments. + "." => { + let fst_arg = LocIdent::fresh(); + let snd_arg = LocIdent::fresh(); + + fun!( + alloc, + fst_arg, + snd_arg, + primop_app!( + alloc, + PrimOp::RecordGet, + builder::var(snd_arg), + builder::var(fst_arg), ) - .with_pos(mk_pos(src_id, l, r)) - ), - //`foo.bar` is a static - // record access, but when used in a curried form, it's a dynamic record - // access (that is, `(.) foo bar` is `foo."%{bar}"`). It turns out a dynamic - // record access takes the record as the last argument, in the style of the - // stdlib. If we want `(.) foo bar` to be `foo."%{bar}"`, we thus have to - // flip the arguments. - "." => - mk_fun!( - "x1", - "x2", - mk_term::op2( - BinaryOp::RecordGet, - mk_term::var("x2"), - mk_term::var("x1"), - ).with_pos(mk_pos(src_id, l, r)) - ), + ) + }, } -InfixUOpApp: UniTerm = - > => UniTerm::from(mk_term::op1(op, t)); +InfixUOpApp: UniTerm<'ast> = + > => UniTerm::from(alloc.primop(op, e)); -InfixBOpApp: UniTerm = - > > => - UniTerm::from(mk_term::op2(op, t1, t2)); +InfixBOpApp: UniTerm<'ast> = + > > => + UniTerm::from(primop_app!(alloc, op, e1, e2)); -InfixLazyBOpApp: UniTerm = - > > => - UniTerm::from(mk_app!(mk_term::op1(op, t1), t2)); +InfixLazyBOpApp: UniTerm<'ast> = + > > => + UniTerm::from(app!(alloc, primop_app!(alloc, op, e1), e2)); -InfixExpr: UniTerm = { +InfixExpr: UniTerm<'ast> = { #[precedence(level="0")] Applicative, #[precedence(level="1")] - "-" > => - UniTerm::from(mk_term::op2(BinaryOp::Sub, Term::Num(Number::ZERO), <>)), + "-" > => + UniTerm::from(primop_app!(alloc, PrimOp::Sub, alloc.number(Number::ZERO), <>)), #[precedence(level="2")] #[assoc(side="left")] InfixBOpApp, @@ -1288,20 +1335,24 @@ InfixExpr: UniTerm = { InfixUOpApp, #[precedence(level="6")] #[assoc(side="left")] - > "&" > => - UniTerm::from(mk_term::op2(BinaryOp::Merge(mk_merge_label(src_id, l, r)), t1, t2)), + > "&" > => + UniTerm::from(primop_app!(alloc, PrimOp::Merge(MergeKind::Standard), t1, t2)), - > "|>" > => - UniTerm::from(mk_app!(t2, t1)), + > "|>" > => + UniTerm::from(app!(alloc, t2, t1)), #[precedence(level="7")] #[assoc(side="left")] InfixBOpApp, #[precedence(level="8")] #[assoc(side="left")] InfixBOpApp, - > "!=" > => + > "!=" > => UniTerm::from( - mk_term::op1(UnaryOp::BoolNot, Term::Op2(BinaryOp::Eq, t1, t2)) + primop_app!( + alloc, + PrimOp::BoolNot, + primop_app!(alloc, PrimOp::Eq, t1, t2), + ) ), #[precedence(level="9")] #[assoc(side="left")] @@ -1312,87 +1363,77 @@ InfixExpr: UniTerm = { #[precedence(level="11")] #[assoc(side="right")] > "->" > => - UniTerm::from(Type::from(TypeF::Arrow(Box::new(s), Box::new(t)))), + UniTerm::from(Type::from(TypeF::Arrow(alloc.type_move(s), alloc.type_move(t)))), } -BOpPre: BinaryOp = { - "contract/apply" => BinaryOp::ContractApply, - "contract/check" => BinaryOp::ContractCheck, - "contract/array_lazy_app" => BinaryOp::ContractArrayLazyApp, - "contract/record_lazy_app" => BinaryOp::ContractRecordLazyApp, - "unseal" => BinaryOp::Unseal, - "seal" => BinaryOp::Seal, - "label/go_field" => BinaryOp::LabelGoField, - "record/has_field" => BinaryOp::RecordHasField(RecordOpKind::IgnoreEmptyOpt), - "record/has_field_with_opts" => BinaryOp::RecordHasField(RecordOpKind::ConsiderAllFields), - "record/field_is_defined" => BinaryOp::RecordFieldIsDefined(RecordOpKind::IgnoreEmptyOpt), - "record/field_is_defined_with_opts" => BinaryOp::RecordFieldIsDefined(RecordOpKind::ConsiderAllFields), - "array/at" => BinaryOp::ArrayAt, - "hash" => BinaryOp::Hash, - "serialize" => BinaryOp::Serialize, - "deserialize" => BinaryOp::Deserialize, - "number/arctan2" => BinaryOp::NumberArcTan2, - "number/log" => BinaryOp::NumberLog, - "pow" => BinaryOp::Pow, - "string/split" => BinaryOp::StringSplit, - "string/contains" => BinaryOp::StringContains, - "string/compare" => BinaryOp::StringCompare, - "record/insert" => BinaryOp::RecordInsert { - ext_kind: RecordExtKind::WithValue, - metadata: Default::default(), - pending_contracts: Default::default(), - op_kind: RecordOpKind::IgnoreEmptyOpt, - }, - "record/insert_with_opts" => BinaryOp::RecordInsert { - ext_kind: RecordExtKind::WithValue, - metadata: Default::default(), - pending_contracts: Default::default(), - op_kind: RecordOpKind::ConsiderAllFields, - }, - "record/remove" => BinaryOp::RecordRemove(RecordOpKind::IgnoreEmptyOpt), - "record/remove_with_opts" => BinaryOp::RecordRemove(RecordOpKind::ConsiderAllFields), - "record/split_pair" => BinaryOp::RecordSplitPair, - "record/disjoint_merge" => BinaryOp::RecordDisjointMerge, - "label/with_message" => BinaryOp::LabelWithMessage, - "label/with_notes" => BinaryOp::LabelWithNotes, - "label/append_note" => BinaryOp::LabelAppendNote, - "label/lookup_type_variable" => BinaryOp::LabelLookupTypeVar, +BOpPre: PrimOp = { + "contract/apply" => PrimOp::ContractApply, + "contract/check" => PrimOp::ContractCheck, + "contract/array_lazy_app" => PrimOp::ContractArrayLazyApp, + "contract/record_lazy_app" => PrimOp::ContractRecordLazyApp, + "unseal" => PrimOp::Unseal, + "seal" => PrimOp::Seal, + "label/go_field" => PrimOp::LabelGoField, + "record/has_field" => PrimOp::RecordHasField(RecordOpKind::IgnoreEmptyOpt), + "record/has_field_with_opts" => PrimOp::RecordHasField(RecordOpKind::ConsiderAllFields), + "record/field_is_defined" => PrimOp::RecordFieldIsDefined(RecordOpKind::IgnoreEmptyOpt), + "record/field_is_defined_with_opts" => PrimOp::RecordFieldIsDefined(RecordOpKind::ConsiderAllFields), + "array/at" => PrimOp::ArrayAt, + "hash" => PrimOp::Hash, + "serialize" => PrimOp::Serialize, + "deserialize" => PrimOp::Deserialize, + "number/arctan2" => PrimOp::NumberArcTan2, + "number/log" => PrimOp::NumberLog, + "pow" => PrimOp::Pow, + "string/split" => PrimOp::StringSplit, + "string/contains" => PrimOp::StringContains, + "string/compare" => PrimOp::StringCompare, + "record/insert" => PrimOp::RecordInsert(RecordOpKind::IgnoreEmptyOpt), + "record/insert_with_opts" => PrimOp::RecordInsert(RecordOpKind::ConsiderAllFields), + "record/remove" => PrimOp::RecordRemove(RecordOpKind::IgnoreEmptyOpt), + "record/remove_with_opts" => PrimOp::RecordRemove(RecordOpKind::ConsiderAllFields), + "record/split_pair" => PrimOp::RecordSplitPair, + "record/disjoint_merge" => PrimOp::RecordDisjointMerge, + "label/with_message" => PrimOp::LabelWithMessage, + "label/with_notes" => PrimOp::LabelWithNotes, + "label/append_note" => PrimOp::LabelAppendNote, + "label/lookup_type_variable" => PrimOp::LabelLookupTypeVar, } -NOpPre: UniTerm = { +NOpPre: UniTerm<'ast> = { "string/replace" => - UniTerm::from(mk_opn!(NAryOp::StringReplace, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::StringReplace, t1, t2, t3)), "string/replace_regex" => - UniTerm::from(mk_opn!(NAryOp::StringReplaceRegex, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::StringReplaceRegex, t1, t2, t3)), "string/substr" => - UniTerm::from(mk_opn!(NAryOp::StringSubstr, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::StringSubstr, t1, t2, t3)), "record/seal_tail" => - UniTerm::from(mk_opn!(NAryOp::RecordSealTail, t1, t2, t3, t4)), + UniTerm::from(primop_app!(alloc, PrimOp::RecordSealTail, t1, t2, t3, t4)), "record/unseal_tail" => - UniTerm::from(mk_opn!(NAryOp::RecordUnsealTail, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::RecordUnsealTail, t1, t2, t3)), "label/insert_type_variable" => - UniTerm::from(mk_opn!(NAryOp::LabelInsertTypeVar, key, pol, label)), + UniTerm::from(primop_app!(alloc, PrimOp::LabelInsertTypeVar, key, pol, label)), "array/slice" => - UniTerm::from(mk_opn!(NAryOp::ArraySlice, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::ArraySlice, t1, t2, t3)), "record/merge_contract" => - UniTerm::from(mk_opn!(NAryOp::MergeContract, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::MergeContract, t1, t2, t3)), } -TypeBuiltin: Type = { +TypeBuiltin: TypeUnr<'ast> = { "Dyn" => Type::from(TypeF::Dyn), "Number" => Type::from(TypeF::Number), "Bool" => Type::from(TypeF::Bool), "String" => Type::from(TypeF::String), } -TypeEnumRow: EnumRow = )?> => { +TypeEnumRow: EnumRow<'ast> = )?> => { EnumRow { id, - typ: typ.map(Box::new), + typ: typ.map(|ty| alloc.type_move(ty)), } }; -TypeEnum: Type = "[|" ",")*> )?> )?> "|]" => { +TypeEnum: TypeUnr<'ast> = "[|" ",")*> )?> )?> "|]" => { let ty = rows.into_iter() .chain(last.into_iter()) // As we build row types as a linked list via a fold on the original @@ -1415,13 +1456,13 @@ TypeEnum: Type = "[|" ",")*> )?> = { , , - "{" "_" ":" > "}" => { + "{" "_" ":" "}" => { Type::from(TypeF::Dict { type_fields: Box::new(t), flavour: DictTypeFlavour::Type @@ -1440,16 +1481,17 @@ TypeAtom: Type = { // right away inside the dictionary contract (before the enclosing `forall` // is fixed) will indeed turn it into a term variable, and raise an unbound // type variable error. - "{" "_" "|" > "}" => { - Type::from(TypeF::Dict { + "{" "_" "|" "}" => { + TypeF::Dict { type_fields: Box::new(t), flavour: DictTypeFlavour::Contract - }) + } }, "_" => { let id = *next_wildcard_id; *next_wildcard_id += 1; - Type::from(TypeF::Wildcard(id)) + + TypeF::Wildcard(id) }, } diff --git a/core/src/parser/mod.rs b/core/src/parser/mod.rs index d0a7279a3c..6ca0d21112 100644 --- a/core/src/parser/mod.rs +++ b/core/src/parser/mod.rs @@ -1,9 +1,8 @@ +use crate::bytecode::ast::{typ::Type, Ast}; use crate::error::{ParseError, ParseErrors}; use crate::files::FileId; use crate::identifier::LocIdent; use crate::position::RawSpan; -use crate::term::RichTerm; -use crate::typ::Type; use lalrpop_util::lalrpop_mod; lalrpop_mod!( @@ -30,9 +29,9 @@ mod tests; /// nickel>foo /// 1 /// ``` -pub enum ExtendedTerm { - RichTerm(RichTerm), - ToplevelLet(LocIdent, RichTerm), +pub enum ExtendedTerm<'ast> { + Expr(Ast<'ast>), + ToplevelLet(LocIdent, Ast<'ast>), } // The interface of LALRPOP-generated parsers, for each public rule. This trait is used as a facade diff --git a/core/src/parser/uniterm.rs b/core/src/parser/uniterm.rs index 58ac82960c..7aa8f624f6 100644 --- a/core/src/parser/uniterm.rs +++ b/core/src/parser/uniterm.rs @@ -5,23 +5,20 @@ use indexmap::{map::Entry, IndexMap}; use utils::{build_record, FieldDef, FieldPathElem}; use crate::{ + bytecode::ast::{ + record::{Field, FieldMetadata}, + typ::{EnumRow, EnumRows, RecordRow, RecordRows, Type}, + Annotation, Ast, AstAlloc, MergePriority, Node, + }, environment::Environment, identifier::Ident, position::{RawSpan, TermPos}, - term::{ - record::{Field, FieldMetadata, RecordAttrs}, - LabeledType, MergePriority, RichTerm, Term, TypeAnnotation, - }, - typ::{ - DictTypeFlavour, EnumRows, EnumRowsF, RecordRow, RecordRows, RecordRowsF, Type, TypeF, - VarKind, - }, + typ::{DictTypeFlavour, EnumRowsF, RecordRowsF, TypeF, VarKind}, }; use std::{ cell::RefCell, collections::{HashMap, HashSet}, - convert::TryFrom, }; /// A node of the uniterm AST. We only define new variants for those constructs that are common to @@ -48,25 +45,25 @@ use std::{ /// it here). If, on the other hand, we enter the rule for an infix operator as in `a + 1`, `a` will /// be converted to a `Term::Var` and the resulting uniterm will be /// `UniTermNode::Term(Term::Op2(..))`. -pub enum UniTermNode { +pub enum UniTermNode<'ast> { /// A variable. Can refer both to a term variable or a type variable. Var(LocIdent), /// A record. Can refer both to a record literal or a record type. - Record(UniRecord), + Record(UniRecord<'ast>), /// A uniterm that has been determined to be a term. - Term(RichTerm), + Term(Ast<'ast>), /// A uniterm that has been determined to be a type. - Type(Type), + Type(Type<'ast>), } /// A uniterm with positional information. -pub struct UniTerm { - node: UniTermNode, +pub struct UniTerm<'ast> { + node: UniTermNode<'ast>, pos: TermPos, } -impl From for UniTerm { - fn from(node: UniTermNode) -> Self { +impl<'ast> From> for UniTerm<'ast> { + fn from(node: UniTermNode<'ast>) -> Self { UniTerm { node, pos: TermPos::None, @@ -74,96 +71,107 @@ impl From for UniTerm { } } -impl UniTerm { +impl<'ast> UniTerm<'ast> { pub fn with_pos(mut self, pos: TermPos) -> Self { self.pos = pos; self } } +trait TryFromUni<'ast, T> +where + Self: Sized, +{ + type Error; + + fn try_from_uni(alloc: &'ast AstAlloc, uni: T) -> Result; +} + // For nodes such as `Type` or `Record`, the following implementation has to choose between two // positions to use: the one of the wrapping `UniTerm`, and the one stored inside the `RichTerm` or -// the `Type`. This implementation assumes that the latest set is the one of `UniTerm`, which is the -// single source of truth. -impl TryFrom for Type { +// the `Type`. This implementation assumes that the latest set is the one of `UniTerm`, which is +// the single source of truth. +impl<'ast> TryFromUni<'ast, UniTerm<'ast>> for Type<'ast> { type Error = ParseError; - fn try_from(ut: UniTerm) -> Result { - let ty_without_pos = match ut.node { - UniTermNode::Var(id) => Type::from(TypeF::Var(id.ident())), - UniTermNode::Record(r) => Type::try_from(r)?, - UniTermNode::Type(ty) => ty, - UniTermNode::Term(rt) => { + fn try_from_uni(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result { + let pos = ut.pos; + + let typ = match ut.node { + UniTermNode::Var(id) => TypeF::Var(id.ident()), + UniTermNode::Record(r) => Type::try_from_uni(alloc, r)?.typ, + UniTermNode::Type(ty) => ty.typ, + UniTermNode::Term(ast) => { if matches!( - rt.as_ref(), - Term::Null - | Term::Bool(_) - | Term::Num(_) - | Term::Str(_) - | Term::Array(..) - | Term::Enum(_) - | Term::EnumVariant { .. } - | Term::StrChunks(..) + ast.node, + Node::Null + | Node::Bool(_) + | Node::Number(_) + | Node::String(_) + | Node::Array(_) + | Node::EnumVariant { .. } + | Node::StringChunks(_) ) { //unwrap(): uniterms are supposed to come from the parser, and thus have a //well-defined position return Err(ParseError::InvalidContract(ut.pos.unwrap())); } - Type::from(TypeF::Contract(rt)) + TypeF::Contract(alloc.ast(ast)) } }; - Ok(ty_without_pos.with_pos(ut.pos)) + Ok(Type { typ, pos }) } } -impl TryFrom for RichTerm { +impl<'ast> TryFromUni<'ast, UniTerm<'ast>> for Ast<'ast> { type Error = ParseError; - fn try_from(ut: UniTerm) -> Result { + fn try_from_uni(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result { let UniTerm { node, pos } = ut; - let rt = match node { - UniTermNode::Var(id) => RichTerm::new(Term::Var(id), pos), - UniTermNode::Record(r) => RichTerm::try_from(r)?, - UniTermNode::Type(mut typ) => { - typ.fix_type_vars(pos.unwrap())?; - if let TypeF::Contract(rt) = typ.typ { - rt.with_pos(pos) - } else { - let contract = typ - .contract() - .map_err(|err| ParseError::UnboundTypeVariables(vec![err.0]))?; - RichTerm::new(Term::Type { typ, contract }, pos) + let node = match node { + UniTermNode::Var(id) => Node::Var(id), + UniTermNode::Record(r) => Ast::try_from_uni(alloc, r)?.node, + UniTermNode::Type(typ) => { + let typ = typ.fix_type_vars(alloc, pos.unwrap())?; + + if let TypeF::Contract(ctr) = typ.typ { + ctr.node.clone() + } else { + alloc.typ(typ) } } - UniTermNode::Term(rt) => rt, + UniTermNode::Term(ast) => ast.node, }; - Ok(rt.with_pos(pos)) + Ok(Ast { node, pos }) } } -impl From for UniTerm { - fn from(rt: RichTerm) -> Self { - let pos = rt.pos; +impl<'ast> From> for UniTerm<'ast> { + fn from(ast: Ast<'ast>) -> Self { + let pos = ast.pos; UniTerm { - node: UniTermNode::Term(rt), + node: UniTermNode::Term(ast), pos, } } } -impl From for UniTerm { - fn from(t: Term) -> Self { - Self::from(RichTerm::from(t)) +impl<'ast> From> for UniTerm<'ast> { + fn from(node: Node<'ast>) -> Self { + UniTerm { + node: UniTermNode::Term(node.into()), + pos: TermPos::None, + } } } -impl From for UniTerm { - fn from(ty: Type) -> Self { +impl<'ast> From> for UniTerm<'ast> { + fn from(ty: Type<'ast>) -> Self { let pos = ty.pos; UniTerm { node: UniTermNode::Type(ty), @@ -172,8 +180,8 @@ impl From for UniTerm { } } -impl From for UniTerm { - fn from(ur: UniRecord) -> Self { +impl<'ast> From> for UniTerm<'ast> { + fn from(ur: UniRecord<'ast>) -> Self { let pos = ur.pos; UniTerm { @@ -185,17 +193,17 @@ impl From for UniTerm { /// A record in the `UniTerm` syntax. #[derive(Clone)] -pub struct UniRecord { - pub fields: Vec, - pub tail: Option<(RecordRows, TermPos)>, - pub attrs: RecordAttrs, +pub struct UniRecord<'ast> { + pub fields: Vec>, + pub tail: Option<(RecordRows<'ast>, TermPos)>, + pub open: bool, pub pos: TermPos, /// The position of the final ellipsis `..`, if any. Used for error reporting. `pos_ellipsis` /// must be different from `TermPos::None` if and only if `attrs.open` is `true`. pub pos_ellipsis: TermPos, } -impl UniRecord { +impl<'ast> UniRecord<'ast> { /// Check if a field definition has a type annotation but no definition. This is currently /// forbidden for record literals that aren't record types. In that case, raise the /// corresponding parse error. @@ -246,11 +254,7 @@ impl UniRecord { value: None, metadata: FieldMetadata { - annotation: - TypeAnnotation { - typ: Some(labeled_ty), - .. - }, + annotation: Annotation { typ: Some(typ), .. }, .. }, .. @@ -269,7 +273,7 @@ impl UniRecord { Entry::Vacant(vacant_entry) => { vacant_entry.insert(FieldState::Candidate(( ident.pos.unwrap(), - labeled_ty.label.span, + typ.pos.unwrap(), ))); None } @@ -277,7 +281,7 @@ impl UniRecord { } // We don't do anything smart for composite paths: we raise an error right way else { - Some((field_def.pos.unwrap(), labeled_ty.label.span)) + Some((field_def.pos.unwrap(), typ.pos.unwrap())) } } field => { @@ -324,7 +328,7 @@ impl UniRecord { FieldMetadata { doc: None, annotation: - TypeAnnotation { + Annotation { typ: Some(_), contracts, }, @@ -332,9 +336,6 @@ impl UniRecord { not_exported: false, priority: MergePriority::Neutral, }, - // At this stage, this field should always be empty. It's a run-time thing, and - // is only filled during program transformation. - pending_contracts: _, } if contracts.is_empty()) }) } @@ -342,12 +343,16 @@ impl UniRecord { /// A plain record type, uniquely containing fields of the form `fields: /// Type`. Currently, this doesn't support the field path syntax: /// `{foo.bar.baz : Type}.into_type_strict()` returns an `Err`. - pub fn into_type_strict(self) -> Result { - fn term_to_record_rows( + pub fn into_type_strict( + self, + alloc: &'ast AstAlloc, + ) -> Result, InvalidRecordTypeError> { + fn term_to_record_rows<'ast>( + alloc: &'ast AstAlloc, id: LocIdent, - field_def: FieldDef, - tail: RecordRows, - ) -> Result { + field_def: FieldDef<'ast>, + tail: RecordRows<'ast>, + ) -> Result, InvalidRecordTypeError> { // At parsing stage, all `Rc`s must be 1-counted. We can thus call // `into_owned()` without risking to actually clone anything. match field_def.field { @@ -359,23 +364,20 @@ impl UniRecord { FieldMetadata { doc: None, annotation: - TypeAnnotation { - typ: Some(labeled_ty), + Annotation { + typ: Some(typ), contracts, }, opt: false, not_exported: false, priority: MergePriority::Neutral, }, - // At this stage, this field should always be empty. It's a run-time thing, and - // is only filled during program transformation. - pending_contracts: _, } if contracts.is_empty() => Ok(RecordRows(RecordRowsF::Extend { row: RecordRow { id, - typ: Box::new(labeled_ty.typ), + typ: alloc.type_data(typ.typ, typ.pos), }, - tail: Box::new(tail), + tail: alloc.record_rows(tail.0), })), _ => { Err(InvalidRecordTypeError::InvalidField( @@ -388,7 +390,7 @@ impl UniRecord { // An open record (with an ellipsis `..` at the end) can't be translated to a record type. // `pos_ellipsis` should be set iff `attrs.open` is true. - debug_assert!((self.pos_ellipsis == TermPos::None) != self.attrs.open); + debug_assert!((self.pos_ellipsis == TermPos::None) != self.open); if let Some(raw_span) = self.pos_ellipsis.into_opt() { return Err(InvalidRecordTypeError::IsOpen(raw_span)); @@ -429,7 +431,7 @@ impl UniRecord { let id = match elem { FieldPathElem::Ident(id) => id, FieldPathElem::Expr(expr) => { - let name = expr.term.as_ref().try_str_chunk_as_static_str().ok_or( + let name = expr.node.try_str_chunk_as_static_str().ok_or( InvalidRecordTypeError::InterpolatedField( field_def.pos.unwrap(), ), @@ -445,7 +447,7 @@ impl UniRecord { }); } - term_to_record_rows(id, field_def, acc) + term_to_record_rows(alloc, id, field_def, acc) } }, )?; @@ -461,7 +463,7 @@ impl UniRecord { } } -impl TryFrom for RichTerm { +impl<'ast> TryFromUni<'ast, UniRecord<'ast>> for Ast<'ast> { type Error = ParseError; /// Convert a `UniRecord` to a term. If the `UniRecord` is syntactically a record type or it @@ -475,60 +477,55 @@ impl TryFrom for RichTerm { /// /// We also fix the type variables of the type appearing inside annotations (see in-code /// documentation of the private symbol `FixTypeVars::fix_type_vars`). - fn try_from(ur: UniRecord) -> Result { + fn try_from_uni(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result { let pos = ur.pos; // First try to interpret this record as a type. - let result = if ur.tail.is_some() || (ur.is_record_type() && !ur.fields.is_empty()) { + if ur.tail.is_some() || (ur.is_record_type() && !ur.fields.is_empty()) { let tail_span = ur.tail.as_ref().and_then(|t| t.1.into_opt()); // We unwrap all positions: at this stage of the parsing, they must all be set - let mut typ = ur - .into_type_strict() - .map_err(|cause| ParseError::InvalidRecordType { - tail_span, - record_span: pos.unwrap(), - cause, - })?; + let typ = + ur.into_type_strict(alloc) + .map_err(|cause| ParseError::InvalidRecordType { + tail_span, + record_span: pos.unwrap(), + cause, + })?; - typ.fix_type_vars(pos.unwrap())?; - let contract = typ - .contract() - .map_err(|err| ParseError::UnboundTypeVariables(vec![err.0]))?; + let typ = typ.fix_type_vars(alloc, pos.unwrap())?; - Ok(RichTerm::new(Term::Type { typ, contract }, pos)) + Ok(alloc.typ(typ).spanned(pos)) } else { ur.check_typed_field_without_def()?; - let UniRecord { fields, attrs, .. } = ur; + let UniRecord { fields, open, .. } = ur; let elaborated = fields .into_iter() .map(|mut field_def| { - fix_field_types(&mut field_def.field.metadata, field_def.pos.unwrap())?; - Ok(field_def.elaborate()) + field_def.field.metadata = + fix_field_types(alloc, field_def.field.metadata, field_def.pos.unwrap())?; + Ok(field_def.elaborate(alloc)) }) .collect::, _>>()?; - let record_term = RichTerm::from(build_record(elaborated, attrs)); - Ok(record_term) - }; - - result.map(|rt| rt.with_pos(pos)) + Ok(build_record(alloc, elaborated, open).spanned(pos)) + } } } /// Try to convert a `UniRecord` to a type. The strict part means that the `UniRecord` must be -impl TryFrom for Type { +impl<'ast> TryFromUni<'ast, UniRecord<'ast>> for Type<'ast> { type Error = ParseError; /// Convert a `UniRecord` to a type. If the `UniRecord` has a tail, it is interpreted strictly /// as a type and fail if it isn't a plain record type. Otherwise, we first try to interpret it /// as a plain record type, and if that doesn't work, we interpret it as a term and wrap it /// back as a user-defined contract. - fn try_from(ur: UniRecord) -> Result { + fn try_from_uni(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result { let pos = ur.pos; if let Some((_, tail_pos)) = ur.tail { - ur.into_type_strict() + ur.into_type_strict(alloc) .map_err(|cause| ParseError::InvalidRecordType { tail_span: tail_pos.into_opt(), record_span: pos.unwrap(), @@ -536,9 +533,9 @@ impl TryFrom for Type { }) } else { let pos = ur.pos; - ur.clone().into_type_strict().or_else(|_| { - RichTerm::try_from(ur).map(|rt| Type { - typ: TypeF::Contract(rt), + ur.clone().into_type_strict(alloc).or_else(|_| { + Ast::try_from_uni(alloc, ur).map(|ast| Type { + typ: TypeF::Contract(alloc.ast(ast)), pos, }) }) @@ -616,10 +613,13 @@ impl VarKindCell { } } -pub(super) trait FixTypeVars { +pub(super) trait FixTypeVars<'ast> +where + Self: Sized, +{ /// Post-process a type at the right hand side of an annotation by replacing each unbound type /// variable `TypeF::Var(id)` by a term variable with the same identifier seen as a custom - /// contract `TypeF::Contract(Term::Var(id))`. + /// contract `TypeF::Contract(Node::Var(id))`. /// /// Additionally, this passes determine the kind of a variable introduced by a forall binder. /// @@ -629,8 +629,8 @@ pub(super) trait FixTypeVars { /// variables occurring in types, we often can't know right away if such a variable occurrence /// will eventually be a type variable or a term variable seen as a custom contract. /// - /// Take for example `a -> b`. At this stage, `a` and `b` could be both variables referring to a - /// contract (e.g. in `x | a -> b`) or a type variable (e.g. in `x | forall a b. a -> b`), + /// Take for example `a -> b`. At this stage, `a` and `b` could be both variables referring to + /// a contract (e.g. in `x | a -> b`) or type variables (e.g. in `x | forall a b. a -> b`), /// depending on enclosing `forall`s. To handle both cases, we initially parse all variables /// inside types as type variables. When reaching the right-hand side of an annotation, because /// `forall`s can only bind locally in a type, we can then decide the actual nature of each @@ -638,9 +638,10 @@ pub(super) trait FixTypeVars { /// that are not actually bound by a `forall` to be term variables. This is the role of /// `fix_type_vars()`. /// - /// Once again because `forall`s only bind variables locally, and don't bind inside contracts, - /// we don't have to recurse into contracts and this pass will only visit each node of the AST - /// at most once in total (and most probably much less so). + /// Since `forall`s only bind type variables locally and cross contract boundaries, we don't + /// have to recurse into contracts and this pass will only visit each node of the AST at most + /// once in total (and most probably much less so). In some sense, we just visit the type + /// layer, or type spine, composed only of type constructors. /// /// There is one subtlety with unirecords, though. A unirecord can still be in interpreted as a /// record type later. Take the following example: @@ -675,26 +676,72 @@ pub(super) trait FixTypeVars { /// # this is inconsistent and will raise a parse error /// forall a. [| 'foo, 'bar; a |] -> {foo : Str, bar: Str; a} /// ``` - fn fix_type_vars(&mut self, span: RawSpan) -> Result<(), ParseError> { - self.fix_type_vars_env(BoundVarEnv::new(), span) + fn fix_type_vars(self, alloc: &'ast AstAlloc, span: RawSpan) -> Result { + Ok(self + .fix_type_vars_env(alloc, BoundVarEnv::new(), span)? + .unwrap_or(self)) + } + + /// Same as [Self::fix_type_vars], but takes `self` as a reference instead, and returns + /// `Ok(None)` when `self` hasn't been modified by the type fixing phase or + /// `Ok(Some(new_self))` with a modified, owned `self` upon change. + fn fix_type_vars_ref( + &self, + alloc: &'ast AstAlloc, + span: RawSpan, + ) -> Result, ParseError> { + self.fix_type_vars_env(alloc, BoundVarEnv::new(), span) } /// Fix type vars in a given environment of variables bound by foralls enclosing this type. The /// environment maps bound variables to a reference to the variable kind of the corresponding /// forall. + /// + /// # Ownership + /// + /// [Self::fix_type_vars_env] might need to be called both on owned data and on immutably + /// borrowed (e.g. [`Type`][crate::bytecode::ast::typ::Type] and [`&'ast + /// Type`][crate::bytecode::ast::typ::Type]). We don't want to duplicate the logic of + /// [Self::fix_type_vars_env] for both, as we can't write that is generic enough and properly + /// avoid useless allocations. + /// + /// The idea of the current API is that even when operating on owned data, `self` is taken by + /// reference. If `self` isn't modified by the fix type phase, then `None` is returned and the + /// caller can just reuse the original `self` how they please. + /// + /// If `self` has been modified by the fix type phase, then `Some(new_value)` is returned with + /// a new owned version of `self`. If the caller needed an owned version, the job is done. + /// Otherwise, the caller can use [the ast allocator `alloc`][crate::bytecode::ast::AstAlloc] + /// to move the owned data into the allocator and get an `&'ast` reference out of it. The only + /// cost is that for owned data, we could have reused the original `self` instead of returning + /// a new one, but this is a detail: in practice only the top-level call is performed on owned + /// data, and the recursive calls are all performed on `&'ast` references. At worse, we waste + /// the top-level node, which is stack-allocated anyway. + /// + /// Because allocated AST nodes are immutable and can't be reclaimed until the whole AST is + /// finally transformed to either the mainline AST or to (in the future) bytecode, we want to + /// avoid reconstructing useless copies of nodes, which is made possible by [FixResult]. fn fix_type_vars_env( - &mut self, + &self, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError>; + ) -> Result, ParseError>; } -impl FixTypeVars for Type { +impl<'ast, 'a> FixTypeVars<'ast> for Type<'ast> { fn fix_type_vars_env( - &mut self, + &self, + alloc: &'ast AstAlloc, mut bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError> { + ) -> Result, ParseError> { + use crate::bytecode::ast::typ::TypeUnr; + + let pos = self.pos; + + let build_fixed = |new_type: TypeUnr<'ast>| -> Self { Type { typ: new_type, pos } }; + match self.typ { TypeF::Dyn | TypeF::Number @@ -708,11 +755,20 @@ impl FixTypeVars for Type { // particular mustn't be allowed to capture type variables from the enclosing type: see // https://github.com/tweag/nickel/issues/1228. | TypeF::Dict { flavour: DictTypeFlavour::Contract, ..} - | TypeF::Wildcard(_) => Ok(()), - TypeF::Arrow(ref mut s, ref mut t) => { - (*s).fix_type_vars_env(bound_vars.clone(), span)?; - (*t).fix_type_vars_env(bound_vars, span)?; - Ok(()) + | TypeF::Wildcard(_) => Ok(None), + TypeF::Arrow(src, tgt) => { + let src_result = src.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + let tgt_result = tgt.fix_type_vars_env(alloc, bound_vars, span)?; + + if src_result.is_some() || tgt_result.is_some() { + let src = src_result.map(|new_src| alloc.type_move(new_src)).unwrap_or(src); + let tgt = tgt_result.map(|new_tgt| alloc.type_move(new_tgt)).unwrap_or(tgt); + + Ok(Some(build_fixed(TypeF::Arrow(src, tgt)))) + } + else { + Ok(None) + } } TypeF::Var(sym) => { if let Some(cell) = bound_vars.get(&sym) { @@ -721,155 +777,265 @@ impl FixTypeVars for Type { ty_var: LocIdent::from(sym).with_pos(self.pos), span })?; + + Ok(None) } else { let id = LocIdent::from(sym).with_pos(self.pos); - self.typ = TypeF::Contract(RichTerm::new(Term::Var(id), id.pos)); + + Ok(Some(build_fixed(TypeF::Contract(alloc.ast(Ast { + node: Node::Var(id), + pos: id.pos, + }))))) } - Ok(()) } TypeF::Forall { - ref var, - ref mut var_kind, - ref mut body, + var, + var_kind: _, + body, } => { // We span a new VarKindCell and put it in the environment. The recursive calls to // fix_type_vars will fill this cell with the correct kind, which we get afterwards // to set the right value for `var_kind`. bound_vars.insert(var.ident(), VarKindCell::new()); -// let x : forall a. { _foo: forall a. a, bar: { ; a } } - (*body).fix_type_vars_env(bound_vars.clone(), span)?; - // unwrap(): We just inserted a value for `var` above, and environment can never + let body = body.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + // unwrap(): we just inserted a value for `var` above, and environment can never // delete values. - // take_var_kind(): Once we leave the body of this forall, we no longer need + // take_var_kind(): once we leave the body of this forall, we no longer need // access to this VarKindCell in bound_vars. We can avoid a clone by taking // the var_kind out. We could also take the whole key value pair out of the // `Environment`, but ownership there is trickier. - *var_kind = bound_vars + let var_kind = bound_vars .get(&var.ident()) .unwrap() .take_var_kind() .unwrap_or_default(); - Ok(()) + Ok(body.map(|body| { + build_fixed(TypeF::Forall { + var, + var_kind, + body: alloc.type_move(body), + }) + })) } TypeF::Dict { - type_fields: ref mut ty, - flavour: DictTypeFlavour::Type - } | TypeF::Array(ref mut ty) => { - (*ty).fix_type_vars_env(bound_vars, span) + type_fields, + flavour: flavour @ DictTypeFlavour::Type + } => { + Ok(type_fields.fix_type_vars_env(alloc, bound_vars, span)?.map(|ty| { + build_fixed(TypeF::Dict { + type_fields: alloc.type_move(ty), + flavour, + }) + })) + } + TypeF::Array(ty) => { + Ok(ty.fix_type_vars_env(alloc, bound_vars, span)?.map(|ty| + build_fixed(TypeF::Array(alloc.type_move(ty))))) + } + TypeF::Enum(ref erows) => { + Ok(erows.fix_type_vars_env(alloc, bound_vars, span)?.map(|erows| + build_fixed(TypeF::Enum(erows)) + )) + } + TypeF::Record(ref rrows) => { + Ok(rrows.fix_type_vars_env(alloc, bound_vars, span)?.map(|rrows| + build_fixed(TypeF::Record(rrows)) + )) } - TypeF::Enum(ref mut erows) => erows.fix_type_vars_env(bound_vars, span), - TypeF::Record(ref mut rrows) => rrows.fix_type_vars_env(bound_vars, span), } } } -impl FixTypeVars for RecordRows { +impl<'ast> FixTypeVars<'ast> for RecordRows<'ast> { fn fix_type_vars_env( - &mut self, + &self, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError> { - fn helper( - rrows: &mut RecordRows, + ) -> Result, ParseError> { + fn do_fix<'ast>( + rrows: &RecordRows<'ast>, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, mut maybe_excluded: HashSet, - ) -> Result<(), ParseError> { + ) -> Result>, ParseError> { match rrows.0 { - RecordRowsF::Empty => Ok(()), - RecordRowsF::TailDyn => Ok(()), + RecordRowsF::Empty | RecordRowsF::TailDyn => Ok(None), // We can't have a contract in tail position, so we don't fix `TailVar`. However, we // have to set the correct kind for the corresponding forall binder. - RecordRowsF::TailVar(ref id) => { + RecordRowsF::TailVar(id) => { if let Some(cell) = bound_vars.get(&id.ident()) { cell.try_set(VarKind::RecordRows { excluded: maybe_excluded, }) - .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: *id, span })?; + .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: id, span })?; } - Ok(()) + + Ok(None) } - RecordRowsF::Extend { - ref mut row, - ref mut tail, - } => { + RecordRowsF::Extend { ref row, tail } => { maybe_excluded.insert(row.id.ident()); - row.typ.fix_type_vars_env(bound_vars.clone(), span)?; - helper(tail, bound_vars, span, maybe_excluded) + + let row_fixed = row.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + let tail_fixed = do_fix(tail, alloc, bound_vars, span, maybe_excluded)?; + + if row_fixed.is_some() || tail_fixed.is_some() { + let row = row_fixed.unwrap_or_else(|| row.clone()); + let tail = tail_fixed + .map(|tail_fixed| alloc.record_rows_move(tail_fixed)) + .unwrap_or(tail); + + Ok(Some(RecordRows(RecordRowsF::Extend { row, tail }))) + } else { + Ok(None) + } } } } - helper(self, bound_vars, span, HashSet::new()) + do_fix(self, alloc, bound_vars, span, HashSet::new()) + } +} + +impl<'ast> FixTypeVars<'ast> for RecordRow<'ast> { + fn fix_type_vars_env( + &self, + alloc: &'ast AstAlloc, + bound_vars: BoundVarEnv, + span: RawSpan, + ) -> Result, ParseError> { + Ok(self + .typ + .fix_type_vars_env(alloc, bound_vars, span)? + .map(|typ| RecordRow { + id: self.id, + typ: alloc.type_move(typ), + })) } } -impl FixTypeVars for EnumRows { +impl<'ast> FixTypeVars<'ast> for EnumRows<'ast> { fn fix_type_vars_env( - &mut self, + &self, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError> { - fn do_fix( - erows: &mut EnumRows, + ) -> Result, ParseError> { + fn do_fix<'ast>( + erows: &EnumRows<'ast>, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, mut maybe_excluded: HashSet, - ) -> Result<(), ParseError> { + ) -> Result>, ParseError> { match erows.0 { - EnumRowsF::Empty => Ok(()), + EnumRowsF::Empty => Ok(None), // We can't have a contract in tail position, so we don't fix `TailVar`. However, we // have to set the correct kind for the corresponding forall binder. - EnumRowsF::TailVar(ref id) => { + EnumRowsF::TailVar(id) => { if let Some(cell) = bound_vars.get(&id.ident()) { cell.try_set(VarKind::EnumRows { excluded: maybe_excluded, }) - .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: *id, span })?; + .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: id, span })?; } - Ok(()) + + Ok(None) } - EnumRowsF::Extend { - ref mut row, - ref mut tail, - } => { - if let Some(ref mut typ) = row.typ { - // Enum tags (when `typ` is `None`) can't create a conflict, so we ignore them - // for constraints. See the documentation of `typecheck::unif::RowConstrs`. + EnumRowsF::Extend { ref row, tail } => { + // Enum tags (when `typ` is `None`) can't create a conflict, so we ignore them + // for constraints. See the documentation of `typecheck::unif::RowConstrs`. + if row.typ.is_some() { maybe_excluded.insert(row.id.ident()); - typ.fix_type_vars_env(bound_vars.clone(), span)?; } - do_fix(tail, bound_vars, span, maybe_excluded) + let row_fixed = row.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + let tail_fixed = do_fix(tail, alloc, bound_vars, span, maybe_excluded)?; + + if row_fixed.is_some() || tail_fixed.is_some() { + let row = row_fixed.unwrap_or_else(|| row.clone()); + let tail = tail_fixed + .map(|tail_fixed| alloc.enum_rows_move(tail_fixed)) + .unwrap_or(tail); + + Ok(Some(EnumRows(EnumRowsF::Extend { row, tail }))) + } else { + Ok(None) + } } } } - do_fix(self, bound_vars, span, HashSet::new()) + do_fix(self, alloc, bound_vars, span, HashSet::new()) } } -/// Fix the type variables of types appearing as annotations of record fields. See the in-code -/// documentation of the private symbol `Types::fix_type_vars`. -pub fn fix_field_types(metadata: &mut FieldMetadata, span: RawSpan) -> Result<(), ParseError> { - use std::rc::Rc; - - if let Some(LabeledType { - typ: ref mut types, .. - }) = metadata.annotation.typ - { - types.fix_type_vars(span)?; - } - - for ctr in metadata.annotation.contracts.iter_mut() { - ctr.typ.fix_type_vars(span)?; +impl<'ast> FixTypeVars<'ast> for EnumRow<'ast> { + fn fix_type_vars_env( + &self, + alloc: &'ast AstAlloc, + bound_vars: BoundVarEnv, + span: RawSpan, + ) -> Result, ParseError> { + // `maybe_fixed` is `Some(ty)` if and only if this enum rows has an associated + // type *and* the type has been changed by fixing. + let maybe_fixed = self + .typ + .as_ref() + .map(|ty| { + // Enum tags (when `typ` is `None`) can't create a conflict, so we ignore them + // for constraints. See the documentation of `typecheck::unif::RowConstrs`. + ty.fix_type_vars_env(alloc, bound_vars.clone(), span) + }) + .transpose()? + .flatten(); - // Although type variables and term variables are currently printed the same, fixing the - // type stored in the label is still better, including to have proper deduplication of - // contracts when pretty printing the result of evaluation back. - ctr.label.typ = Rc::new(ctr.typ.clone()); + Ok(maybe_fixed.map(|typ| EnumRow { + id: self.id, + typ: Some(alloc.type_move(typ)), + })) } +} - Ok(()) +/// Fix the type variables of types appearing as annotations of record fields. See the in-code +/// documentation of the private symbol `Types::fix_type_vars`. +pub fn fix_field_types<'ast>( + alloc: &'ast AstAlloc, + metadata: FieldMetadata<'ast>, + span: RawSpan, +) -> Result, ParseError> { + use std::borrow::Cow; + + let typ = metadata + .annotation + .typ + .map(|typ| typ.fix_type_vars(alloc, span)) + .transpose()?; + + let contracts: Result>, ParseError> = metadata + .annotation + .contracts + .iter() + .map(|ctr| { + Ok(ctr + .fix_type_vars_ref(alloc, span)? + .map(|typ| Cow::Owned(typ)) + .unwrap_or(Cow::Borrowed(ctr))) + }) + .collect(); + let contracts = contracts?; + + // If none of the contracts have been changed, we can keep the original `[Type]` allocation. + let contracts = if contracts.iter().all(|cow| matches!(cow, Cow::Borrowed(_))) { + metadata.annotation.contracts + } else { + alloc.types(contracts.into_iter().map(|cow| cow.into_owned())) + }; + + Ok(FieldMetadata { + annotation: Annotation { typ, contracts }, + ..metadata + }) } diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 0cedb31a61..dfeac36771 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -1,45 +1,45 @@ //! Various helpers and companion code for the parser are put here to keep the grammar definition //! uncluttered. -use indexmap::map::Entry; -use std::ffi::OsString; -use std::rc::Rc; -use std::{collections::HashSet, fmt::Debug}; +use std::{ + ffi::OsString, + iter, + rc::Rc, + {collections::HashSet, fmt::Debug}, +}; -use self::pattern::bindings::Bindings as _; +use indexmap::{map::Entry, IndexMap}; use super::error::ParseError; -use crate::cache::InputFormat; use crate::{ combine::Combine, eval::{ merge::{merge_doc, split}, operation::RecPriority, }, + cache::InputFormat, + combine::CombineAlloc, + eval::merge::{merge_doc, split}, files::FileId, identifier::LocIdent, label::{Label, MergeKind, MergeLabel}, - mk_app, mk_fun, position::{RawSpan, TermPos}, - term::pattern::{Pattern, PatternData}, - term::{ - make as mk_term, - record::{Field, FieldMetadata, RecordAttrs, RecordData}, - *, - }, typ::Type, }; -use malachite::num::conversion::traits::{FromSciString, FromStringBase}; +use malachite::{ + num::conversion::traits::{FromSciString, FromStringBase}, + Integer, +}; pub struct ParseNumberError; -pub fn parse_number_sci(slice: &str) -> Result { - Rational::from_sci_string(slice).ok_or(ParseNumberError) +pub fn parse_number_sci(slice: &str) -> Result { + Number::from_sci_string(slice).ok_or(ParseNumberError) } -pub fn parse_number_base(base: u8, slice: &str) -> Result { - Ok(Rational::from( +pub fn parse_number_base(base: u8, slice: &str) -> Result { + Ok(Number::from( Integer::from_string_base(base, slice).ok_or(ParseNumberError)?, )) } @@ -84,17 +84,17 @@ pub enum StringEndDelimiter { /// Left hand side of a record field declaration. #[derive(Clone, Debug)] -pub enum FieldPathElem { +pub enum FieldPathElem<'ast> { /// A static field declaration: `{ foo = .. }` Ident(LocIdent), /// A quoted field declaration: `{ "%{protocol}" = .. }` /// - /// In practice, the argument must always be `StrChunks`, but since we also need to keep track + /// In practice, the argument must always be `StringChunks`, but since we also need to keep track /// of the associated span it's handier to just use a `RichTerm`. - Expr(RichTerm), + Expr(Ast<'ast>), } -pub type FieldPath = Vec; +pub type FieldPath<'ast> = Vec>; /// A string chunk literal atom, being either a string or a single char. /// @@ -109,91 +109,84 @@ pub enum ChunkLiteralPart { /// A field definition atom. A field is defined by a path, a potential value, and associated /// metadata. #[derive(Clone, Debug)] -pub struct FieldDef { - pub path: FieldPath, - pub field: Field, +pub struct FieldDef<'ast> { + pub path: FieldPath<'ast>, + pub field: Field<'ast>, pub pos: TermPos, } -impl FieldDef { +impl<'ast> FieldDef<'ast> { /// Elaborate a record field definition specified as a path, like `a.b.c = foo`, into a regular /// flat definition `a = {b = {c = foo}}`. /// /// # Preconditions /// - /!\ path must be **non-empty**, otherwise this function panics - pub fn elaborate(self) -> (FieldPathElem, Field) { - let last_ident = self.path.last().and_then(|elem| match elem { - FieldPathElem::Ident(id) => Some(*id), - FieldPathElem::Expr(_) => None, - }); - + pub fn elaborate(self, alloc: &'ast AstAlloc) -> (FieldPathElem<'ast>, Field<'ast>) { let mut it = self.path.into_iter(); let fst = it.next().unwrap(); - let content = it - .rev() - .fold(self.field.with_name(last_ident), |acc, path_elem| { - // We first compute a position for the intermediate generated records (it's useful - // in particular for the LSP). The position starts at the subpath corresponding to - // the intermediate record and ends at the final value. - // - // unwrap is safe here becuase the initial content has a position, and we make sure - // we assign a position for the next field. - let pos = match path_elem { - FieldPathElem::Ident(id) => id.pos, - FieldPathElem::Expr(ref expr) => expr.pos, - }; - // unwrap is safe here because every id should have a non-`TermPos::None` position - let id_span = pos.unwrap(); - let acc_span = acc - .value - .as_ref() - .map(|value| value.pos.unwrap()) - .unwrap_or(id_span); - - // `RawSpan::fuse` only returns `None` when the two spans are in different files. - // A record field and its value *must* be in the same file, so this is safe. - let pos = TermPos::Original(id_span.fuse(acc_span).unwrap()); - - match path_elem { - FieldPathElem::Ident(id) => { - let mut fields = IndexMap::new(); - fields.insert(id, acc); - Field::from(RichTerm::new( - Term::Record(RecordData { - fields, - ..Default::default() - }), + let content = it.rev().fold(self.field, |acc, path_elem| { + // We first compute a position for the intermediate generated records (it's useful + // in particular for the LSP). The position starts at the subpath corresponding to + // the intermediate record and ends at the final value. + // + // unwrap is safe here becuase the initial content has a position, and we make sure + // we assign a position for the next field. + let pos = match path_elem { + FieldPathElem::Ident(id) => id.pos, + FieldPathElem::Expr(ref expr) => expr.pos, + }; + // unwrap is safe here because every id should have a non-`TermPos::None` position + let id_span = pos.unwrap(); + let acc_span = acc + .value + .as_ref() + .map(|value| value.pos.unwrap()) + .unwrap_or(id_span); + + // `RawSpan::fuse` only returns `None` when the two spans are in different files. + // A record field and its value *must* be in the same file, so this is safe. + let pos = TermPos::Original(id_span.fuse(acc_span).unwrap()); + + match path_elem { + FieldPathElem::Ident(id) => Field::from(Ast { + node: Node::Record(alloc.record_data( + iter::once((id, acc)), + iter::empty(), + false, + )), + pos, + }), + FieldPathElem::Expr(exp) => { + let static_access = exp.node.try_str_chunk_as_static_str(); + + if let Some(static_access) = static_access { + let id = LocIdent::new_with_pos(static_access, exp.pos); + Field::from(Ast { + node: Node::Record(alloc.record_data( + iter::once((id, acc)), + iter::empty(), + false, + )), pos, - )) - } - FieldPathElem::Expr(exp) => { - let static_access = exp.term.as_ref().try_str_chunk_as_static_str(); - - if let Some(static_access) = static_access { - let id = LocIdent::new_with_pos(static_access, exp.pos); - let mut fields = IndexMap::new(); - fields.insert(id, acc); - Field::from(RichTerm::new( - Term::Record(RecordData { - fields, - ..Default::default() - }), - pos, - )) - } else { - // The record we create isn't recursive, because it is only comprised of - // one dynamic field. It's just simpler to use the infrastructure of - // `RecRecord` to handle dynamic fields at evaluation time rather than - // right here - Field::from(RichTerm::new( - Term::RecRecord(RecordData::empty(), vec![(exp, acc)], None), - pos, - )) - } + }) + } else { + // The record we create isn't recursive, because it is only comprised of + // one dynamic field. It's just simpler to use the infrastructure of + // `RecRecord` to handle dynamic fields at evaluation time rather than + // right here + Field::from(Ast { + node: Node::Record(alloc.record_data( + std::iter::empty(), + std::iter::once((exp, acc)), + false, + )), + pos, + }) } } - }); + } + }); (fst, content) } @@ -214,60 +207,56 @@ impl FieldDef { /// The last field of a record, that can either be a normal field declaration or an ellipsis. #[derive(Clone, Debug)] -pub enum RecordLastField { - Field(FieldDef), +pub enum RecordLastField<'ast> { + Field(FieldDef<'ast>), Ellipsis, } -/// A single binding in a let block. -#[derive(Clone, Debug)] -pub struct LetBinding { - pub pattern: Pattern, - pub annot: Option, - pub value: RichTerm, -} - /// An infix operator that is not applied. Used for the curried operator syntax (e.g `(==)`) -pub enum InfixOp { - Unary(UnaryOp), - Binary(BinaryOp), -} - -impl From for InfixOp { - fn from(op: UnaryOp) -> Self { - InfixOp::Unary(op) - } -} - -impl From for InfixOp { - fn from(op: BinaryOp) -> Self { - InfixOp::Binary(op) - } -} +pub struct InfixOp(primop::PrimOp); impl InfixOp { /// Eta-expand an operator. This wraps an operator, for example `==`, as a function `fun x1 x2 /// => x1 == x2`. Propagate the given position to the function body, for better error /// reporting. - pub fn eta_expand(self, pos: TermPos) -> RichTerm { - let pos = pos.into_inherited(); + pub fn eta_expand(self, alloc: &AstAlloc) -> Node<'_> { match self { // We treat `UnaryOp::BoolAnd` and `UnaryOp::BoolOr` separately. - // They should morally be binary operators, but we represent them as unary - // operators internally so that their second argument is evaluated lazily. - InfixOp::Unary(op @ UnaryOp::BoolAnd) | InfixOp::Unary(op @ UnaryOp::BoolOr) => { - mk_fun!( - "x1", - "x2", - mk_app!(mk_term::op1(op, mk_term::var("x1")), mk_term::var("x2")).with_pos(pos) + // + // They are unary operators taking a second lazy argument, but the current mainine + // evaluator expects that they are always fully applied (including to their argument). + // That is, Nickel currently doesn't support a partial application like `%bool_or% + // ` (which is fine, because the latter isn't actually representable in the + // source language: `BoolOr` is only expressible through the infix syntax ` || + // `). Thus, instead of eta-expanding to `fun x => x` as we would for other + // unary operators, we eta-expand to `fun x1 x2 => x1 x2`. + InfixOp(op @ primop::PrimOp::BoolAnd) | InfixOp(op @ primop::PrimOp::BoolOr) => { + let fst_arg = LocIdent::fresh(); + let snd_arg = LocIdent::fresh(); + + alloc.nary_fun( + [ + pattern::Pattern::any(fst_arg), + pattern::Pattern::any(snd_arg), + ], + alloc + .app( + alloc + .prim_op(op, iter::once(Node::Var(fst_arg).into())) + .into(), + iter::once(Node::Var(snd_arg).into()), + ) + .into(), + ) + } + InfixOp(op) => { + let arg = LocIdent::fresh(); + + alloc.fun( + pattern::Pattern::any(arg), + alloc.prim_op(op, iter::once(Node::Var(arg).into())).into(), ) } - InfixOp::Unary(op) => mk_fun!("x", mk_term::op1(op, mk_term::var("x")).with_pos(pos)), - InfixOp::Binary(op) => mk_fun!( - "x1", - "x2", - mk_term::op2(op, mk_term::var("x1"), mk_term::var("x2")).with_pos(pos) - ), } } } @@ -275,28 +264,24 @@ impl InfixOp { /// Trait for structures representing annotations which can be combined with a term to build /// another term, or another structure holding a term, such as a field. `T` is the said target /// structure. -pub trait AttachTerm { - fn attach_term(self, rt: RichTerm) -> T; +pub trait AttachToAst<'ast, T> { + fn attach_to_ast(self, alloc: &'ast AstAlloc, ast: Ast<'ast>) -> T; } -impl Combine for Option { - fn combine(left: Self, right: Self) -> Self { +impl<'ast, T: CombineAlloc<'ast>> CombineAlloc<'ast> for Option { + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { match (left, right) { (None, None) => None, (None, Some(x)) | (Some(x), None) => Some(x), - (Some(left), Some(right)) => Some(Combine::combine(left, right)), + (Some(left), Some(right)) => Some(CombineAlloc::combine(alloc, left, right)), } } } -impl Combine for FieldMetadata { +impl<'ast> CombineAlloc<'ast> for FieldMetadata<'ast> { /// Combine two field metadata into one. If data that can't be combined (typically, the /// documentation or the type annotation) are set by both, the left one's are kept. - /// - /// Note that no environment management operation such as closurization of contracts takes - /// place, because this function is expected to be used on the AST before the evaluation (in - /// the parser or during program transformation). - fn combine(left: Self, right: Self) -> Self { + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { let priority = match (left.priority, right.priority) { // Neutral corresponds to the case where no priority was specified. In that case, the // other priority takes precedence. @@ -308,7 +293,7 @@ impl Combine for FieldMetadata { FieldMetadata { doc: merge_doc(left.doc, right.doc), - annotation: Combine::combine(left.annotation, right.annotation), + annotation: CombineAlloc::combine(alloc, left.annotation, right.annotation), opt: left.opt || right.opt, // The resulting field will be suppressed from serialization if either of the fields to be merged is. not_exported: left.not_exported || right.not_exported, @@ -317,153 +302,97 @@ impl Combine for FieldMetadata { } } -impl AttachTerm for FieldMetadata { - fn attach_term(self, rt: RichTerm) -> Field { +impl<'ast> AttachToAst<'ast, Field<'ast>> for FieldMetadata<'ast> { + fn attach_to_ast(self, _alloc: &'ast AstAlloc, ast: Ast<'ast>) -> Field<'ast> { Field { - value: Some(rt), + value: Some(ast), metadata: self, - pending_contracts: Default::default(), } } } -impl Combine for LetMetadata { - // Combine two let metadata into one. If `doc` is set by both, the left one's documentation - // is kept. - fn combine(left: Self, right: Self) -> Self { +impl<'ast> CombineAlloc<'ast> for LetMetadata<'ast> { + /// Combine two let metadata into one. Same as `FieldMetadata::combine` but restricted to the + /// metadata that can be associated to a let block. + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { LetMetadata { - doc: left.doc.or(right.doc), - annotation: Combine::combine(left.annotation, right.annotation), + doc: merge_doc(left.doc, right.doc), + annotation: CombineAlloc::combine(alloc, left.annotation, right.annotation), } } } -impl Combine for TypeAnnotation { - /// Combine two type annotations. If both have `types` set, the final type +impl<'ast> CombineAlloc<'ast> for Annotation<'ast> { + /// Combine two annotations. If both have `types` set, the final type /// is the one of the left annotation, while the right one's type is put /// inside the final `contracts`. /// /// Contracts are combined from left to right; the left one's are put first, /// then maybe the right one's type annotation and then the right one's /// contracts. - fn combine(left: Self, right: Self) -> Self { + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { let (typ, leftover) = match (left.typ, right.typ) { (left_ty @ Some(_), right_ty @ Some(_)) => (left_ty, right_ty), (left_ty, right_ty) => (left_ty.or(right_ty), None), }; - let contracts = left + let contracts: Vec<_> = left .contracts - .into_iter() + .iter() + .cloned() .chain(leftover) - .chain(right.contracts) + .chain(right.contracts.iter().cloned()) .collect(); - TypeAnnotation { typ, contracts } + alloc.annotation(typ, contracts) } } -impl AttachTerm for TypeAnnotation { - fn attach_term(self, rt: RichTerm) -> RichTerm { +impl<'ast> AttachToAst<'ast, Ast<'ast>> for Annotation<'ast> { + fn attach_to_ast(self, alloc: &'ast AstAlloc, ast: Ast<'ast>) -> Ast<'ast> { if self.is_empty() { - return rt; + return ast; } - let pos = rt.pos; - RichTerm::new(Term::Annotated(self, rt), pos) - } -} - -/// Some constructs are introduced with the metadata pipe operator `|`, but aren't metadata per se -/// (ex: `rec force`/`rec default`). Those are collected in this extended annotation and then -/// desugared into standard metadata. -#[derive(Clone, Debug, Default)] -pub struct FieldExtAnnot { - /// Standard metadata. - pub metadata: FieldMetadata, - /// Presence of an annotation `push force` - pub rec_force: bool, - /// Presence of an annotation `push default` - pub rec_default: bool, -} - -impl FieldExtAnnot { - pub fn new() -> Self { - Default::default() - } -} - -impl AttachTerm for FieldExtAnnot { - fn attach_term(self, value: RichTerm) -> Field { - let value = if self.rec_force || self.rec_default { - let rec_prio = if self.rec_force { - RecPriority::Top - } else { - RecPriority::Bottom - }; - - let pos = value.pos; - Some(rec_prio.apply_rec_prio_op(value).with_pos(pos)) - } else { - Some(value) - }; - - Field { - value, - metadata: self.metadata, - pending_contracts: Default::default(), - } - } -} - -impl Combine for FieldExtAnnot { - fn combine(left: Self, right: Self) -> Self { - let metadata = FieldMetadata::combine(left.metadata, right.metadata); - let rec_force = left.rec_force || right.rec_force; - let rec_default = left.rec_default || right.rec_default; - - FieldExtAnnot { - metadata, - rec_force, - rec_default, + let pos = ast.pos; + Ast { + node: alloc.annotated(self, ast), + pos, } } } -impl From for FieldExtAnnot { - fn from(metadata: FieldMetadata) -> Self { - FieldExtAnnot { - metadata, - ..Default::default() - } - } -} - -/// Turn dynamic accesses using literal chunks only into static accesses -pub fn mk_access(access: RichTerm, root: RichTerm) -> RichTerm { - if let Some(label) = access.as_ref().try_str_chunk_as_static_str() { - mk_term::op1( - UnaryOp::RecordAccess(LocIdent::new_with_pos(label, access.pos)), - root, +/// Takes a record access written as `foo.""`, and either turn it into a static access +/// whenever possible (when `` is a static string without interpolation), or into a dynamic +/// `%record/get%` access otherwise. +pub fn mk_access<'ast>(alloc: &'ast AstAlloc, access: Ast<'ast>, root: Ast<'ast>) -> Node<'ast> { + if let Some(label) = access.node.try_str_chunk_as_static_str() { + alloc.prim_op( + primop::PrimOp::RecordStatAccess(LocIdent::new_with_pos(label, access.pos)), + iter::once(root), ) } else { - mk_term::op2(BinaryOp::RecordGet, access, root) + alloc.prim_op(primop::PrimOp::RecordGet, [access, root]) } } /// Build a record from a list of field definitions. If a field is defined several times, the /// different definitions are merged. -pub fn build_record(fields: I, attrs: RecordAttrs) -> Term +pub fn build_record<'ast, I>(alloc: &'ast AstAlloc, fields: I, open: bool) -> Node<'ast> where - I: IntoIterator + Debug, + I: IntoIterator, Field<'ast>)> + Debug, { + use indexmap::IndexMap; + + // We keep a hashmap to make it faster to merge fields with the same identifier. let mut static_fields = IndexMap::new(); let mut dynamic_fields = Vec::new(); - fn insert_static_field( - static_fields: &mut IndexMap, + fn insert_static_field<'ast>( + alloc: &'ast AstAlloc, + static_fields: &mut IndexMap>, id: LocIdent, - field: Field, + field: Field<'ast>, ) { match static_fields.entry(id) { Entry::Occupied(mut occpd) => { @@ -471,7 +400,7 @@ where let prev = occpd.insert(Field::default()); // unwrap(): the field's identifier must have a position during parsing. - occpd.insert(merge_fields(id.pos.unwrap(), prev, field)); + occpd.insert(merge_fields(alloc, id.pos.unwrap(), prev, field)); } Entry::Vacant(vac) => { vac.insert(field); @@ -480,8 +409,10 @@ where } fields.into_iter().for_each(|field| match field { - (FieldPathElem::Ident(id), t) => insert_static_field(&mut static_fields, id, t), - (FieldPathElem::Expr(e), t) => { + (FieldPathElem::Ident(id), field) => { + insert_static_field(alloc, &mut static_fields, id, field) + } + (FieldPathElem::Expr(e), field) => { // Dynamic fields (whose name is defined by an interpolated string) have a different // semantics than fields whose name can be determined statically. However, static // fields with special characters are also parsed as string chunks: @@ -490,84 +421,117 @@ where // let x = "dynamic" in {"I%am.static" = false, "%{x}" = true} // ``` // - // Here, both fields are parsed as `StrChunks`, but the first field is actually a + // Here, both fields are parsed as `StringChunks`, but the first field is actually a // static one, just with special characters. The following code determines which fields // are actually static or not, and inserts them in the right location. - let static_access = e.term.as_ref().try_str_chunk_as_static_str(); + let static_access = e.node.try_str_chunk_as_static_str(); if let Some(static_access) = static_access { insert_static_field( + alloc, &mut static_fields, LocIdent::new_with_pos(static_access, e.pos), - t, + field, ) } else { - dynamic_fields.push((e, t)); + dynamic_fields.push((e, field)); } } }); - Term::RecRecord( - RecordData::new(static_fields, attrs, None), - dynamic_fields, - None, - ) + Node::Record(alloc.record_data(static_fields, dynamic_fields, open)) } /// Merge two fields by performing the merge of both their value (dynamically if /// necessary, by introducing a merge operator) and their metadata (statically). /// -/// If the values of both fields are static records ([`Term::Record`]s), their -/// merge is computed statically. This prevents building terms whose depth is -/// linear in the number of fields if partial definitions are involved. This -/// manifested in https://github.com/tweag/nickel/issues/1427. -fn merge_fields(id_span: RawSpan, field1: Field, field2: Field) -> Field { +/// If the values of both fields are records, their merge is computed statically. This prevents +/// building terms whose depth is linear in the number of fields if partial definitions are +/// involved. This manifested in https://github.com/tweag/nickel/issues/1427. +fn merge_fields<'ast>( + alloc: &'ast AstAlloc, + id_span: RawSpan, + field1: Field<'ast>, + field2: Field<'ast>, +) -> Field<'ast> { // FIXME: We're duplicating a lot of the logic in // [`eval::merge::merge_fields`] but not quite enough to actually factor // it out - fn merge_values(id_span: RawSpan, t1: RichTerm, t2: RichTerm) -> RichTerm { - let RichTerm { - term: t1, - pos: pos1, - } = t1; - let RichTerm { - term: t2, - pos: pos2, - } = t2; - match (t1.into_owned(), t2.into_owned()) { - (Term::Record(rd1), Term::Record(rd2)) => { + fn merge_values<'ast>( + alloc: &'ast AstAlloc, + id_span: RawSpan, + t1: Ast<'ast>, + t2: Ast<'ast>, + ) -> Ast<'ast> { + match (t1.node, t2.node) { + // We don't handle the case of record with dynamic fields, as merging statically and + // dynamically won't have the same semantics if a dynamic field has the same name as + // one of the field of the other record (merging statically will error out, while + // merging dynamically will properly merge their values). + // + // This wasn't handled before the move to the new ast (RFC007) either anyway. + (Node::Record(rd1), Node::Record(rd2)) + if rd1.dyn_fields.is_empty() && rd2.dyn_fields.is_empty() => + { + // We collect fields into temporary hashmaps to easily compute the split. + let left_hashed: IndexMap> = rd1 + .stat_fields + .iter() + .map(|(id, field)| (*id, field.clone())) + .collect(); + let right_hashed: IndexMap> = rd2 + .stat_fields + .iter() + .map(|(id, field)| (*id, field.clone())) + .collect(); let split::SplitResult { left, center, right, - } = split::split(rd1.fields, rd2.fields); - let mut fields = IndexMap::with_capacity(left.len() + center.len() + right.len()); + } = split::split(left_hashed, right_hashed); + + let mut fields = Vec::with_capacity(left.len() + center.len() + right.len()); fields.extend(left); fields.extend(right); for (id, (field1, field2)) in center.into_iter() { - fields.insert(id, merge_fields(id_span, field1, field2)); + fields.push((id, merge_fields(alloc, id_span, field1, field2))); + } + + Ast { + node: Node::Record(alloc.record_data( + fields, + std::iter::empty(), + rd1.open || rd2.open, + )), + //[^record-elaboration-position]: we don't really have a good position to put here. In the end, maybe we + //should keep `TermPos` in `Ast` as long as the parser has to do some of the + //desugaring. + pos: TermPos::None, } - Term::Record(RecordData::new( - fields, - RecordAttrs::combine(rd1.attrs, rd2.attrs), - None, - )) - .into() } - (t1, t2) => mk_term::op2( - BinaryOp::Merge(MergeLabel { - span: id_span, - kind: MergeKind::PiecewiseDef, - }), - RichTerm::new(t1, pos1), - RichTerm::new(t2, pos2), - ), + (node1, node2) => Ast { + node: alloc.prim_op( + primop::PrimOp::Merge(MergeKind::Standard), + [ + Ast { + node: node1, + pos: t1.pos, + }, + Ast { + node: node2, + pos: t2.pos, + }, + ], + ), + // cf [^record-elaboration-position] + pos: TermPos::None, + }, } } let (value, priority) = match (field1.value, field2.value) { (Some(t1), Some(t2)) if field1.metadata.priority == field2.metadata.priority => ( - Some(merge_values(id_span, t1, t2)), + Some(merge_values(alloc, id_span, t1, t2)), field1.metadata.priority, ), (Some(t), _) if field1.metadata.priority > field2.metadata.priority => { @@ -582,20 +546,21 @@ fn merge_fields(id_span: RawSpan, field1: Field, field2: Field) -> Field { _ => unreachable!(), }; - // At this stage, pending contracts aren't filled nor meaningful, and should all be empty. - debug_assert!(field1.pending_contracts.is_empty() && field2.pending_contracts.is_empty()); Field { value, // [`FieldMetadata::combine`] produces subtly different behaviour from // the runtime merging code, which is what we need to replicate here metadata: FieldMetadata { doc: merge_doc(field1.metadata.doc, field2.metadata.doc), - annotation: Combine::combine(field1.metadata.annotation, field2.metadata.annotation), + annotation: CombineAlloc::combine( + alloc, + field1.metadata.annotation, + field2.metadata.annotation, + ), opt: field1.metadata.opt && field2.metadata.opt, not_exported: field1.metadata.not_exported || field2.metadata.not_exported, priority, }, - pending_contracts: Vec::new(), } } @@ -630,22 +595,20 @@ pub fn mk_merge_label(src_id: FileId, l: usize, r: usize) -> MergeLabel { } } -/// Generate a `Let` or a `LetPattern` (depending on whether there's a binding -/// with a record pattern) from the parsing of a let definition. -pub fn mk_let( +/// Checks that there are no duplicate bindings in a let block (when bindins are simple, that is +/// they aren't pattern), and builds the corresponding let block node if the check passes. +pub fn mk_let<'ast>( + alloc: &'ast AstAlloc, rec: bool, - bindings: Vec, - body: RichTerm, -) -> Result { - let all_simple = bindings - .iter() - .all(|b| matches!(b.pattern.data, PatternData::Any(_))); - + bindings: Vec>, + body: Ast<'ast>, +) -> Result, ParseError> { // Check for duplicate names across the different bindings. We // don't check for duplicate names within a single binding because // there are backwards-compatibility constraints (e.g., see // `RecordPattern::check_dup`). let mut seen_bindings: HashSet = HashSet::new(); + for b in &bindings { let new_bindings = b.pattern.bindings(); for (_path, id, _field) in &new_bindings { @@ -660,47 +623,14 @@ pub fn mk_let( seen_bindings.extend(new_bindings.into_iter().map(|(_path, id, _field)| id)); } - if all_simple { - Ok(mk_term::let_in( - rec, - bindings.into_iter().map(|mut b| { - let PatternData::Any(id) = b.pattern.data else { - // unreachable: we checked for `all_simple`, meaning that - // all bindings are just Any(_). - unreachable!() - }; - if let Some(ann) = b.annot { - b.value = ann.annotation.attach_term(b.value); - } - (id, b.value) - }), - body, - )) - } else { - Ok(mk_term::let_pat_in( - rec, - bindings.into_iter().map(|mut b| { - if let Some(ann) = b.annot { - b.value = ann.annotation.attach_term(b.value); - } - (b.pattern, b.value) - }), - body, - )) - } -} - -/// Generate a `Fun` (when the pattern is trivial) or a `FunPattern` from the parsing of a function -/// definition. This function panics if the definition somehow has neither an `Ident` nor a -/// non-`Empty` `Destruct` pattern. -pub fn mk_fun(pat: Pattern, body: RichTerm) -> Term { - match pat.data { - PatternData::Any(id) => Term::Fun(id, body), - _ => Term::FunPattern(pat, body), - } + Ok(alloc.let_block(bindings, body, rec)) } -pub fn mk_import_based_on_filename(path: String, _span: RawSpan) -> Result { +pub fn mk_import_based_on_filename<'ast>( + alloc: &'ast AstAlloc, + path: String, + _span: RawSpan, +) -> Result, ParseError> { let path = OsString::from(path); let format: Option = InputFormat::from_path(std::path::Path::new(path.as_os_str())); @@ -708,19 +638,21 @@ pub fn mk_import_based_on_filename(path: String, _span: RawSpan) -> Result( + alloc: &'ast AstAlloc, path: String, format: LocIdent, span: RawSpan, -) -> Result { +) -> Result, ParseError> { let path = OsString::from(path); let Some(format) = InputFormat::from_tag(format.label()) else { return Err(ParseError::InvalidImportFormat { span }); }; - Ok(Term::Import(Import::Path { path, format })) + + Ok(alloc.import(path, format)) } /// Determine the minimal level of indentation of a multi-line string. @@ -729,21 +661,21 @@ pub fn mk_import_explicit( /// indentation level of a line is the number of consecutive whitespace characters, which are /// either a space or a tab, counted from the beginning of the line. If a line is empty or consist /// only of whitespace characters, it is ignored. -pub fn min_indent(chunks: &[StrChunk]) -> usize { +pub fn min_indent<'ast>(chunks: &[StringChunk>]) -> usize { let mut min: usize = usize::MAX; let mut current = 0; let mut start_line = true; for chunk in chunks.iter() { match chunk { - StrChunk::Expr(_, _) if start_line => { + StringChunk::Expr(_, _) if start_line => { if current < min { min = current; } start_line = false; } - StrChunk::Expr(_, _) => (), - StrChunk::Literal(s) => { + StringChunk::Expr(_, _) => (), + StringChunk::Literal(s) => { for c in s.chars() { match c { ' ' | '\t' if start_line => current += 1, @@ -822,9 +754,9 @@ pub fn min_indent(chunks: &[StrChunk]) -> usize { ///not sth /// end" /// ``` -pub fn strip_indent(mut chunks: Vec>) -> Vec> { +pub fn strip_indent<'ast>(chunks: &mut Vec>>) { if chunks.is_empty() { - return chunks; + return; } let min = min_indent(&chunks); @@ -856,7 +788,7 @@ pub fn strip_indent(mut chunks: Vec>) -> Vec { + StringChunk::Literal(ref mut s) => { let mut buffer = String::new(); for c in s.chars() { match c { @@ -907,7 +839,7 @@ pub fn strip_indent(mut chunks: Vec>) -> Vec { + StringChunk::Expr(_, ref mut indent) => { if start_line { debug_assert!(current >= min); debug_assert!(expr_on_line.is_none()); @@ -923,12 +855,12 @@ pub fn strip_indent(mut chunks: Vec>) -> Vec *indent = 0, - _ => panic!(), + Some(StringChunk::Expr(_, ref mut indent)) => *indent = 0, + _ => unreachable!( + "all elements in `unindent` should be expressions, but found a literal" + ), } } - - chunks } #[cfg(test)] From 8483d742c6fa6bf0dbd08e111cb395cf88147191 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 18 Nov 2024 19:08:48 +0100 Subject: [PATCH 02/23] Fix almost all grammar errors, fix parser/mod.rs --- core/src/bytecode/ast/builder.rs | 11 +- core/src/bytecode/ast/compat.rs | 20 +- core/src/bytecode/ast/mod.rs | 7 - core/src/bytecode/ast/pattern/mod.rs | 2 +- core/src/bytecode/ast/typ.rs | 13 ++ core/src/cache.rs | 6 +- core/src/error/mod.rs | 4 +- core/src/eval/tests.rs | 4 +- core/src/parser/grammar.lalrpop | 260 +++++++++++------------ core/src/parser/mod.rs | 135 ++++++++++-- core/src/parser/tests.rs | 4 +- core/src/parser/uniterm.rs | 43 ++-- core/src/parser/utils.rs | 2 +- core/src/pretty.rs | 8 +- core/src/program.rs | 10 +- core/src/repl/mod.rs | 14 +- core/src/typ.rs | 4 +- core/tests/integration/typecheck_fail.rs | 2 +- lsp/nls/src/analysis.rs | 4 +- lsp/nls/src/position.rs | 4 +- utils/src/test_program.rs | 8 +- 21 files changed, 337 insertions(+), 228 deletions(-) diff --git a/core/src/bytecode/ast/builder.rs b/core/src/bytecode/ast/builder.rs index 4bc1d928c4..521796906f 100644 --- a/core/src/bytecode/ast/builder.rs +++ b/core/src/bytecode/ast/builder.rs @@ -338,11 +338,18 @@ macro_rules! primop_app { macro_rules! fun { ( $alloc: expr, $id:expr, $body:expr $(,)?) => { $crate::bytecode::ast::Ast::from( - $alloc.fun($crate::identifier::LocIdent::from($id), $crate::bytecode::ast::Ast::from($body)) + $alloc.fun( + $crate::bytecode::ast::pattern::Pattern::any($crate::identifier::LocIdent::from($id)), + $crate::bytecode::ast::Ast::from($body) + ) ) }; ( $alloc:expr, $id1:expr, $id2:expr $(, $rest:expr )+ $(,)?) => { - fun!($alloc, $crate::identifier::LocIdent::from($id1), fun!($alloc, $id2, $( $rest ),+)) + fun!( + $alloc, + $id1, + fun!($alloc, $id2, $( $rest ),+) + ) }; } diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index a5d34d3961..a1dd531bce 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -270,17 +270,14 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> { Term::Bool(b) => Node::Bool(*b), Term::Num(n) => alloc.number(n.clone()), Term::Str(s) => alloc.string(s), - Term::StrChunks(chunks) => alloc.string_chunks( - chunks - .iter() - .map(|chunk| match chunk { - term::StrChunk::Literal(s) => StringChunk::Literal(s.clone()), - term::StrChunk::Expr(expr, indent) => { - StringChunk::Expr(expr.to_ast(alloc), *indent) - } - }) - .rev(), - ), + Term::StrChunks(chunks) => { + alloc.string_chunks(chunks.iter().rev().map(|chunk| match chunk { + term::StrChunk::Literal(s) => StringChunk::Literal(s.clone()), + term::StrChunk::Expr(expr, indent) => { + StringChunk::Expr(expr.to_ast(alloc), *indent) + } + })) + } Term::Fun(id, body) => alloc.fun(Pattern::any(*id), body.to_ast(alloc)), Term::FunPattern(pat, body) => alloc.fun(pat.to_ast(alloc), body.to_ast(alloc)), Term::Let(bindings, body, attrs) => alloc.let_block( @@ -1203,6 +1200,7 @@ impl<'ast> FromAst> for term::Term { Node::StringChunks(chunks) => { let chunks = chunks .iter() + .rev() .map(|chunk| match chunk { StringChunk::Literal(s) => term::StrChunk::Literal(s.clone()), StringChunk::Expr(expr, indent) => { diff --git a/core/src/bytecode/ast/mod.rs b/core/src/bytecode/ast/mod.rs index 8647ff9ae6..3d00694861 100644 --- a/core/src/bytecode/ast/mod.rs +++ b/core/src/bytecode/ast/mod.rs @@ -661,13 +661,6 @@ impl AstAlloc { }) } - pub fn constant_pattern<'ast>( - &'ast self, - cst_pat: ConstantPattern<'ast>, - ) -> &'ast ConstantPattern<'ast> { - self.generic_arena.alloc(cst_pat) - } - pub fn or_pattern<'ast, I>(&'ast self, patterns: I, pos: TermPos) -> &'ast OrPattern<'ast> where I: IntoIterator>, diff --git a/core/src/bytecode/ast/pattern/mod.rs b/core/src/bytecode/ast/pattern/mod.rs index abd23adbc7..032f390d44 100644 --- a/core/src/bytecode/ast/pattern/mod.rs +++ b/core/src/bytecode/ast/pattern/mod.rs @@ -79,7 +79,7 @@ pub struct FieldPattern<'ast> { /// - In `{foo={}, bar, ..}`, the last match is a non-capturing ellipsis. /// - In `{foo={}, bar, ..rest}`, the last match is a capturing ellipsis. #[derive(Debug, PartialEq, Clone)] -pub enum LastPattern<'ast, P> { +pub enum PatternTail<'ast, P> { /// The last field is a normal match. In this case the pattern is "closed" so every record /// fields should be matched. Normal(&'ast P), diff --git a/core/src/bytecode/ast/typ.rs b/core/src/bytecode/ast/typ.rs index 2078f3d283..807064cdc5 100644 --- a/core/src/bytecode/ast/typ.rs +++ b/core/src/bytecode/ast/typ.rs @@ -40,3 +40,16 @@ impl<'ast> From> for Type<'ast> { } } } + +impl<'ast> Type<'ast> { + /// Sets a new position for this type. + pub fn with_pos(self, pos: TermPos) -> Type<'ast> { + Type { pos, ..self } + } +} + +impl<'ast> TypeUnr<'ast> { + pub fn spanned(self, pos: TermPos) -> Type<'ast> { + Type { typ: self, pos } + } +} diff --git a/core/src/cache.rs b/core/src/cache.rs index 1d179b0e68..c67f1d4063 100644 --- a/core/src/cache.rs +++ b/core/src/cache.rs @@ -9,7 +9,7 @@ use crate::metrics::measure_runtime; #[cfg(feature = "nix-experimental")] use crate::nix_ffi; use crate::package::PackageMap; -use crate::parser::{lexer::Lexer, ErrorTolerantParser}; +use crate::parser::{lexer::Lexer, ErrorTolerantParserCompat}; use crate::position::TermPos; use crate::program::FieldPath; use crate::stdlib::{self as nickel_stdlib, StdlibModule}; @@ -586,7 +586,7 @@ impl Cache { InputFormat::Nickel => { let (t, parse_errs) = measure_runtime!( "runtime:parse:nickel", - parser::grammar::TermParser::new().parse_tolerant(file_id, Lexer::new(buf))? + parser::grammar::ExprParser::new().parse_tolerant(file_id, Lexer::new(buf))? ); Ok((t, parse_errs)) @@ -1716,7 +1716,7 @@ pub mod resolvers { if let hash_map::Entry::Vacant(e) = self.term_cache.entry(file_id) { let buf = self.files.source(file_id); - let term = parser::grammar::TermParser::new() + let term = parser::grammar::ExprParser::new() .parse_strict(file_id, Lexer::new(buf)) .map_err(|e| ImportError::ParseErrors(e, *pos))?; e.insert(term); diff --git a/core/src/error/mod.rs b/core/src/error/mod.rs index bca92750c9..7d7cacf686 100644 --- a/core/src/error/mod.rs +++ b/core/src/error/mod.rs @@ -1733,7 +1733,7 @@ mod blame_error { /// and calls `ty_path::span`. This new type is guaranteed to have all of its positions set, /// providing a definite `PathSpan`. This is similar to the behavior of [`super::primary_alt`]. pub fn path_span(files: &mut Files, path: &[ty_path::Elem], ty: &Type) -> PathSpan { - use crate::parser::{grammar::FixedTypeParser, lexer::Lexer, ErrorTolerantParser}; + use crate::parser::{grammar::FixedTypeParser, lexer::Lexer, ErrorTolerantParserCompat}; ty_path::span(path.iter().peekable(), ty) .or_else(|| { @@ -1741,7 +1741,7 @@ mod blame_error { let file_id = files.add(super::UNKNOWN_SOURCE_NAME, type_pprinted.clone()); let ty_with_pos = FixedTypeParser::new() - .parse_strict(file_id, Lexer::new(&type_pprinted)) + .parse_strict_compat(file_id, Lexer::new(&type_pprinted)) .unwrap(); ty_path::span(path.iter().peekable(), &ty_with_pos) diff --git a/core/src/eval/tests.rs b/core/src/eval/tests.rs index 412ef1ff8f..5cfa29424e 100644 --- a/core/src/eval/tests.rs +++ b/core/src/eval/tests.rs @@ -4,7 +4,7 @@ use crate::cache::resolvers::{DummyResolver, SimpleResolver}; use crate::error::{ImportError, NullReporter}; use crate::files::Files; use crate::label::Label; -use crate::parser::{grammar, lexer, ErrorTolerantParser}; +use crate::parser::{grammar, lexer, ErrorTolerantParserCompat}; use crate::term::make as mk_term; use crate::term::Number; use crate::term::{BinaryOp, StrChunk, UnaryOp}; @@ -29,7 +29,7 @@ fn eval_full_no_import(t: RichTerm) -> Result { fn parse(s: &str) -> Option { let id = Files::new().add("", String::from(s)); - grammar::TermParser::new() + grammar::ExprParser::new() .parse_strict(id, lexer::Lexer::new(s)) .map(RichTerm::without_pos) .map_err(|err| println!("{err:?}")) diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 423715250f..75fc7bb36d 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -101,11 +101,11 @@ WithPos: Rule = => t.with_pos(mk_pos(src_id, left, right)); AsExpr: Ast<'ast> = > =>? - Ast::try_from(ut) + Ast::try_convert(alloc, ut) .map_err(|e| lalrpop_util::ParseError::User{error: e}); AsType: Type<'ast> = > =>? - Type::try_from(ut) + Type::try_convert(alloc, ut) .map_err(|e| lalrpop_util::ParseError::User{error: e}); // Repeat a rule zero times or more with a separator interspersed, such that the last @@ -123,7 +123,7 @@ AsUniTerm: UniTerm<'ast> = > => UniTerm::from(ut); AnnotSeries: AnnotAtom = => { <> .into_iter() - .fold(Default::default(), |acc, next| CombineAlloc::combine(alloc, acc, next)); + .fold(Default::default(), |acc, next| CombineAlloc::combine(alloc, acc, next)) }; // A single type or contract annotation. The `Type` rule forbids the use of @@ -138,12 +138,12 @@ AnnotSeries: AnnotAtom = => { // or later (bare `Type`). Almost all rules are of the former kind, and use // `FixedType` (see `FixedType` and `parser::utils::fix_type_vars`). AnnotAtom: Annotation<'ast> = { - "|" => Annotation { - contracts: alloc.types(iter::once(ty)), + "|" => Annotation { + contracts: alloc.types(iter::once(<>)), ..Default::default() }, - ":" => Annotation { - typ: ty, + ":" => Annotation { + typ: Some(<>), ..Default::default() }, }; @@ -153,9 +153,12 @@ AnnotAtom: Annotation<'ast> = { // contract annotation), let annotations also include documentation (`doc`). As // opposed to record fields, they can't express priority, optionality, etc. LetAnnotAtom: LetMetadata<'ast> = { - > => <>.into(), - "|" "doc" => LetMetadata { - doc: Some(s.into()), + AnnotAtom => LetMetadata { + annotation: <>, + ..Default::default() + }, + "|" "doc" => LetMetadata { + doc: Some(<>.into()), ..Default::default() }, } @@ -237,15 +240,14 @@ Type: Type<'ast> = { // This rule is public and can be used from external modules to parse an input // directly as a type. pub FixedType: Type<'ast> = { - =>? { - ty.fix_type_vars(alloc, mk_span(src_id, l, r))?; - Ok(ty) + =>? { + Ok(ty.fix_type_vars(alloc, mk_span(src_id, l, r))?) } }; -// Either a term or a top-level let-binding (a let-binding without an `in`). -// Used exclusively for the REPL. -pub ExtendedTerm: ExtendedTerm<'ast> = { +// Either an expression or a top-level let-binding (a let-binding without an +// `in`). Used exclusively for the REPL. +pub ExtendedExpr: ExtendedTerm> = { "let" ?> "=" => { if let Some(ann) = ann { exp = ann.annotation.attach_to_ast(alloc, exp); @@ -266,7 +268,7 @@ LetBinding: LetBinding<'ast> = { UniTerm: UniTerm<'ast> = { InfixExpr, AnnotatedInfixExpr, - AsUniTerm, + AsUniTerm>, "let" > @@ -324,7 +326,7 @@ Forall: TypeUnr<'ast> = typ: TypeF::Forall { var, var_kind: VarKind::Type, - body: alloc.type_move(acc), + body: alloc.alloc(acc), }, pos } @@ -336,7 +338,7 @@ Forall: TypeUnr<'ast> = // application is the leftmost part in ` ... `. ApplicativeHead: UniTerm<'ast> = { Atom, - AsUniTerm>, + AsUniTerm>, > => UniTerm::from(primop_app!(alloc, op, t)), > > => UniTerm::from(primop_app!(alloc, op, t1, t2)), @@ -347,8 +349,8 @@ ApplicativeHead: UniTerm<'ast> = { // A n-ary application-like expression (n may be 0, in the sense that this rule // also includes previous levels). Applicative: UniTerm<'ast> = { - > *> => { - let node = match head { + > *> => { + let node = match &head { // A zero-ary application is just the head. _ if args.is_empty() => head.node, // We special case the application of an enum tag here. In principle, an @@ -356,7 +358,7 @@ Applicative: UniTerm<'ast> = { // function application. However, for convenience, we made the syntax // the same. So we now have to detect cases like `'Foo {x=1}` and // convert that to a proper enum variant. - Node::EnumVariant { tag, arg: None } if args.len() == 1 => + Ast { node: Node::EnumVariant { tag, arg: None }, pos: _ } if args.len() == 1 => alloc.enum_variant(*tag, args.pop()), _ => alloc.app(head, args), }; @@ -370,7 +372,7 @@ TypeArray: TypeUnr<'ast> = "Array" > => // For some reason, we have to bind the type into a `t` // rather than using the usual `<>` placeholder, otherwise, // it doesn't compile. - TypeF::Array(alloc.type_move(t)); + TypeF::Array(alloc.alloc(t)); // A record operation chain, such as `{foo = data}.bar.baz`. RecordOperationChain: Node<'ast> = { @@ -424,12 +426,12 @@ NumberLiteral: Number = { }; Atom: UniTerm<'ast> = { - "(" > ")", + "(" >> ")", "(" ")", NumberLiteral => UniTerm::from(alloc.number(<>)), "null" => UniTerm::from(Node::Null), Bool => UniTerm::from(Node::Bool(<>)), - AsUniTerm, + AsUniTerm>, Ident => UniTerm::from(UniTermNode::Var(<>)), WithPos => UniTerm::from(UniTermNode::Record(<>)), EnumTag => UniTerm::from(Node::EnumVariant { tag: <>, arg: None }), @@ -476,7 +478,7 @@ Error : Ast<'ast> = => { t.error, src_id) ) - .spanned(pos); + .spanned(pos) }; RecordLastField: RecordLastField<'ast> = { @@ -645,11 +647,11 @@ PatternF: Pattern<'ast> = { #[inline] PatternDataF: PatternData<'ast> = { - RecordPattern => PatternData::Record(alloc.record_pattern(<>)), - ArrayPattern => PatternData::Array(alloc.array_pattern(<>)), - ConstantPattern => PatternData::Constant(alloc.constant_pattern(<>)), - EnumRule => PatternData::Enum(alloc.enum_pattern(<>)), - OrRule => PatternData::Or(alloc.or_pattern(<>)), + RecordPattern => PatternData::Record(alloc.alloc(<>)), + ArrayPattern => PatternData::Array(alloc.alloc(<>)), + ConstantPattern => PatternData::Constant(alloc.alloc(<>)), + EnumRule => PatternData::Enum(alloc.alloc(<>)), + OrRule => PatternData::Or(alloc.alloc(<>)), IdentRule => PatternData::Any(<>), "_" => PatternData::Wildcard, }; @@ -707,21 +709,7 @@ ConstantPatternData: ConstantPatternData<'ast> = { }; RecordPattern: RecordPattern<'ast> = { - "{" ",")*> "}" =>? { - let tail = match last { - Some(LastPattern::Normal(m)) => { - field_pats.push(*m); - TailPattern::Empty - }, - Some(LastPattern::Ellipsis(Some(captured))) => { - TailPattern::Capture(captured) - } - Some(LastPattern::Ellipsis(None)) => { - TailPattern::Open - } - None => TailPattern::Empty, - }; - + "{" ",")*> "}" =>? { let pattern = RecordPattern { patterns: alloc.field_patterns(field_pats), tail, @@ -734,21 +722,7 @@ RecordPattern: RecordPattern<'ast> = { }; ArrayPattern: ArrayPattern<'ast> = { - "[" ",")*> "]" => { - let tail = match last { - Some(LastPattern::Normal(m)) => { - patterns.push(*m); - TailPattern::Empty - }, - Some(LastPattern::Ellipsis(Some(captured))) => { - TailPattern::Capture(captured) - } - Some(LastPattern::Ellipsis(None)) => { - TailPattern::Open - } - None => TailPattern::Empty, - }; - + "[" ",")*> "]" => { ArrayPattern { patterns: alloc.patterns(patterns), tail, @@ -861,11 +835,11 @@ OrPatternBranch: Pattern<'ast> = { Pattern { pos, alias: None, - data: PatternData::Enum(EnumPattern { + data: PatternData::Enum(alloc.enum_pattern(EnumPattern { tag: <>.tag, pattern: None, pos, - }), + })), } }, }; @@ -876,7 +850,10 @@ OrPatternUnparens: OrPattern<'ast> = { > => { - let patterns = + // We need to collect in a vector here because the allocator needs an + // exact sized iterator to know beforehand how much memory it needs to + // reserve + let patterns : Vec<_> = patterns.into_iter().chain(std::iter::once(last)).collect(); OrPattern { @@ -921,16 +898,18 @@ FieldPattern: FieldPattern<'ast> = { }, }; -// Last field pattern of a record pattern -LastFieldPat: LastPattern<'ast, FieldPattern<'ast>> = { - FieldPattern => LastPattern::Normal(alloc.field_pattern(<>)), - ".." => LastPattern::Ellipsis(<>), -}; - -// Last pattern of an array pattern -LastElemPat: LastPattern<'ast, Pattern<'ast>> = { - Pattern => LastPattern::Normal(alloc.pattern(<>)), - ".." => LastPattern::Ellipsis(<>), +// Potential ellipsis at the end of an array or a record pattern. This rule also +// account for the presence of a trailing +TailPattern: TailPattern = { + "," ".." => { + if let Some(captured) = <> { + TailPattern::Capture(captured) + } + else { + TailPattern::Open + } + }, + ","? => TailPattern::Empty, } // A default annotation in a pattern. @@ -994,7 +973,7 @@ StringChunks: Node<'ast> = { "Fatal parser error: a string starting with {start:?} should never be closed by {end:?}" ); - let chunks: Vec>> = fst.into_iter() + let mut chunks: Vec>> = fst.into_iter() .map(StringChunk::Literal) .chain(chunks.into_iter() .map(|(mut es, s)| { @@ -1005,38 +984,38 @@ StringChunks: Node<'ast> = { .chain(lasts.into_iter()) .collect(); - let chunks = if start.needs_strip_indent() { - strip_indent(chunks) - } else { - chunks - }; + if start.needs_strip_indent() { + strip_indent(&mut chunks); + } // In the case of symbolic strings, we don't produce a string (in // practice string chunks). The chunks are reified to an Nickel array // and wrapped in a record instead. if let StringStartDelimiter::Symbolic(prefix) = start { let terms = chunks.into_iter().map(|chunk| match chunk { - StringChunk::Literal(_) => alloc.string_chunks(iter::once(chunk)), + StringChunk::Literal(_) => alloc.string_chunks(iter::once(chunk)).into(), StringChunk::Expr(e, _) => e, - }).collect(); + }); - build_record([ - ( - FieldPathElem::Ident("tag".into()), - Field::from(Ast::from(builder::enum_tag("SymbolicString"))) - ), - ( - FieldPathElem::Ident("prefix".into()), - Field::from(Ast::from(builder::enum_tag(prefix))) - ), - ( - FieldPathElem::Ident("fragments".into()), - Field::from(Ast::from(alloc.array(terms))) - ) - ], Default::default()) + build_record( + alloc, + [ + ( + FieldPathElem::Ident("tag".into()), + Field::from(Ast::from(builder::enum_tag("SymbolicString"))) + ), + ( + FieldPathElem::Ident("prefix".into()), + Field::from(Ast::from(builder::enum_tag(prefix))) + ), + ( + FieldPathElem::Ident("fragments".into()), + Field::from(Ast::from(alloc.array(terms))) + ) + ], + Default::default() + ) } else { - let mut chunks = chunks; - chunks.reverse(); alloc.string_chunks(chunks) } }, @@ -1081,10 +1060,17 @@ StandardStaticString = DelimitedStaticString<"\"", "\"">; // A static string using the multiline string syntax. MultilineStaticString: String = DelimitedStaticString<"m%\"","\"%"> => { // strip the common indentation prefix - let chunks: Vec>> = vec![StringChunk::Literal(<>)]; - match strip_indent(chunks).pop().unwrap() { + let mut chunks = vec![StringChunk::Literal(<>)]; + strip_indent(&mut chunks); + + // unwrap(): we crated the vector just above with exactly one element, and + // `strip_indent` doesn't change the size of its vector argument, so there's + // still exactly one element + match chunks.pop().unwrap() { StringChunk::Literal(s) => s, - // We build + // unreachable: we built the only element as a `StringChunk::Literal`, + // and `strip_indent` doesn't change the nature of chunks, so the only + // element can't be anything else (an expression) _ => unreachable!(), } }; @@ -1141,8 +1127,9 @@ UOp: PrimOp = { "string/is_match" => PrimOp::StringIsMatch, "string/find" => PrimOp::StringFind, "string/find_all" => PrimOp::StringFindAll, - "op rec_force" => PrimOp::RecForce, - "op rec_default" => PrimOp::RecDefault, + // Currently recursive priorities are disabled (since 1.2.0). + // "op rec_force" => PrimOp::RecForce, + // "op rec_default" => PrimOp::RecDefault, "record/empty_with_tail" => PrimOp::RecordEmptyWithTail, "trace" => PrimOp::Trace, "label/push_diag" => PrimOp::LabelPushDiag, @@ -1237,14 +1224,14 @@ InfixUOpOrLazyBOp: PrimOp = { } InfixOp: InfixOp = { - => <>.into(), - => <>.into(), + => InfixOp(<>), + => InfixOp(<>), } //TODO[RFC007]: restore proper operation positions CurriedOp: Node<'ast> = { InfixOp => <>.eta_expand(alloc), - "&" => InfixOp::from(PrimOp::Merge(MergeKind::Standard)).eta_expand(alloc), + "&" => InfixOp(PrimOp::Merge(MergeKind::Standard)).eta_expand(alloc), "|>" => { let fst_arg = LocIdent::fresh(); let snd_arg = LocIdent::fresh(); @@ -1258,7 +1245,7 @@ CurriedOp: Node<'ast> = { builder::var(snd_arg), builder::var(fst_arg), ), - ) + ).node }, "!=" => { let fst_arg = LocIdent::fresh(); @@ -1278,7 +1265,7 @@ CurriedOp: Node<'ast> = { builder::var(snd_arg), ), ), - ) + ).node }, //`foo.bar` is a static record access, but when used in a curried form, it's //a dynamic record access (that is, `(.) foo bar` is `foo."%{bar}"`). It @@ -1293,18 +1280,18 @@ CurriedOp: Node<'ast> = { alloc, fst_arg, snd_arg, - primop_app!( + primop_app!( alloc, PrimOp::RecordGet, builder::var(snd_arg), builder::var(fst_arg), ) - ) + ).node }, } InfixUOpApp: UniTerm<'ast> = - > => UniTerm::from(alloc.primop(op, e)); + > => UniTerm::from(alloc.prim_op(op, e)); InfixBOpApp: UniTerm<'ast> = > > => @@ -1363,7 +1350,7 @@ InfixExpr: UniTerm<'ast> = { #[precedence(level="11")] #[assoc(side="right")] > "->" > => - UniTerm::from(Type::from(TypeF::Arrow(alloc.type_move(s), alloc.type_move(t)))), + UniTerm::from(Type::from(TypeF::Arrow(alloc.alloc(s), alloc.alloc(t)))), } BOpPre: PrimOp = { @@ -1371,8 +1358,8 @@ BOpPre: PrimOp = { "contract/check" => PrimOp::ContractCheck, "contract/array_lazy_app" => PrimOp::ContractArrayLazyApp, "contract/record_lazy_app" => PrimOp::ContractRecordLazyApp, - "unseal" => PrimOp::Unseal, - "seal" => PrimOp::Seal, + "unseal" => todo!(), + "seal" => todo!(), "label/go_field" => PrimOp::LabelGoField, "record/has_field" => PrimOp::RecordHasField(RecordOpKind::IgnoreEmptyOpt), "record/has_field_with_opts" => PrimOp::RecordHasField(RecordOpKind::ConsiderAllFields), @@ -1420,16 +1407,16 @@ NOpPre: UniTerm<'ast> = { } TypeBuiltin: TypeUnr<'ast> = { - "Dyn" => Type::from(TypeF::Dyn), - "Number" => Type::from(TypeF::Number), - "Bool" => Type::from(TypeF::Bool), - "String" => Type::from(TypeF::String), + "Dyn" => TypeF::Dyn, + "Number" => TypeF::Number, + "Bool" => TypeF::Bool, + "String" => TypeF::String, } TypeEnumRow: EnumRow<'ast> = )?> => { EnumRow { id, - typ: typ.map(|ty| alloc.type_move(ty)), + typ: typ.map(|ty| alloc.alloc(ty)), } }; @@ -1442,31 +1429,30 @@ TypeEnum: TypeUnr<'ast> = "[|" ",")*> // order for error reporting. .rev() .fold( - EnumRows( - match tail { - Some(id) => EnumRowsF::TailVar(id), - None => EnumRowsF::Empty, - } - ), + match tail { + Some(id) => EnumRowsF::TailVar(id), + None => EnumRowsF::Empty, + } + , |erows, row| { - EnumRows(EnumRowsF::Extend { + EnumRowsF::Extend { row, - tail: Box::new(erows) - }) + tail: alloc.enum_rows(erows) + } } ); - TypeF::Enum(ty) + EnumRows(TypeF::Enum(ty)) }; TypeAtom: TypeUnr<'ast> = { - , - , - "{" "_" ":" "}" => { - Type::from(TypeF::Dict { - type_fields: Box::new(t), + TypeBuiltin, + TypeEnum, + "{" "_" ":" "}" => { + TypeF::Dict { + type_fields: alloc.alloc(<>), flavour: DictTypeFlavour::Type - }) + } }, // Although dictionary contracts aren't really types, we treat them as // types for now - at least syntactically - as they are represented using a @@ -1481,9 +1467,9 @@ TypeAtom: TypeUnr<'ast> = { // right away inside the dictionary contract (before the enclosing `forall` // is fixed) will indeed turn it into a term variable, and raise an unbound // type variable error. - "{" "_" "|" "}" => { + "{" "_" "|" "}" => { TypeF::Dict { - type_fields: Box::new(t), + type_fields: alloc.alloc(<>), flavour: DictTypeFlavour::Contract } }, diff --git a/core/src/parser/mod.rs b/core/src/parser/mod.rs index 6ca0d21112..cc45aebcf5 100644 --- a/core/src/parser/mod.rs +++ b/core/src/parser/mod.rs @@ -1,4 +1,8 @@ -use crate::bytecode::ast::{typ::Type, Ast}; +use crate::bytecode::ast::{ + compat::{FromAst, ToMainline}, + typ::Type, + Ast, AstAlloc, +}; use crate::error::{ParseError, ParseErrors}; use crate::files::FileId; use crate::identifier::LocIdent; @@ -15,7 +19,7 @@ use grammar::__ToTriple; pub mod error; pub mod lexer; -pub mod uniterm; +pub(crate) mod uniterm; pub mod utils; #[cfg(test)] @@ -29,9 +33,9 @@ mod tests; /// nickel>foo /// 1 /// ``` -pub enum ExtendedTerm<'ast> { - Expr(Ast<'ast>), - ToplevelLet(LocIdent, Ast<'ast>), +pub enum ExtendedTerm { + Expr(T), + ToplevelLet(LocIdent, T), } // The interface of LALRPOP-generated parsers, for each public rule. This trait is used as a facade @@ -40,9 +44,12 @@ pub enum ExtendedTerm<'ast> { // of this module, if we don't want our implementation to be coupled to LALRPOP details. // // The type of `parse` was just copy-pasted from the generated code of LALRPOP. -trait LalrpopParser { +//TODO: We could avoid having those pesky `'ast` lifetimes at the top-level of every trait using +//generic associated types, but it's not entirely trivial - to investigate. +trait LalrpopParser<'ast, T> { fn parse<'input, 'err, 'wcard, __TOKEN, __TOKENS>( &self, + alloc: &'ast AstAlloc, src_id: FileId, errors: &'err mut Vec< lalrpop_util::ErrorRecovery, self::error::ParseError>, @@ -51,7 +58,7 @@ trait LalrpopParser { __tokens0: __TOKENS, ) -> Result, self::error::ParseError>> where - __TOKEN: __ToTriple<'input, 'err, 'wcard>, + __TOKEN: __ToTriple<'input, 'ast, 'err, 'wcard>, __TOKENS: IntoIterator; } @@ -59,9 +66,10 @@ trait LalrpopParser { /// LALRPOP. macro_rules! generate_lalrpop_parser_impl { ($parser:ty, $output:ty) => { - impl LalrpopParser<$output> for $parser { + impl<'ast> LalrpopParser<'ast, $output> for $parser { fn parse<'input, 'err, 'wcard, __TOKEN, __TOKENS>( &self, + alloc: &'ast AstAlloc, src_id: FileId, errors: &'err mut Vec< lalrpop_util::ErrorRecovery< @@ -77,54 +85,67 @@ macro_rules! generate_lalrpop_parser_impl { lalrpop_util::ParseError, self::error::ParseError>, > where - __TOKEN: __ToTriple<'input, 'err, 'wcard>, + __TOKEN: __ToTriple<'input, 'ast, 'err, 'wcard>, __TOKENS: IntoIterator, { - Self::parse(self, src_id, errors, next_wildcard_id, __tokens0) + Self::parse(self, alloc, src_id, errors, next_wildcard_id, __tokens0) } } }; } -generate_lalrpop_parser_impl!(grammar::ExtendedTermParser, ExtendedTerm); -generate_lalrpop_parser_impl!(grammar::TermParser, RichTerm); -generate_lalrpop_parser_impl!(grammar::FixedTypeParser, Type); +generate_lalrpop_parser_impl!(grammar::ExtendedExprParser, ExtendedTerm>); +generate_lalrpop_parser_impl!(grammar::ExprParser, Ast<'ast>); +generate_lalrpop_parser_impl!(grammar::FixedTypeParser, Type<'ast>); generate_lalrpop_parser_impl!(grammar::StaticFieldPathParser, Vec); generate_lalrpop_parser_impl!( grammar::CliFieldAssignmentParser, - (Vec, RichTerm, RawSpan) + (Vec, Ast<'ast>, RawSpan) ); -/// Generic interface of the various specialized Nickel parsers. +/// General interface of the various specialized Nickel parsers. /// /// `T` is the product of the parser (a term, a type, etc.). -pub trait ErrorTolerantParser { +pub trait ErrorTolerantParser<'ast, T> { /// Parse a value from a lexer with the given `file_id` in an error-tolerant way. This methods /// can still fail for non-recoverable errors. fn parse_tolerant( &self, + alloc: &'ast AstAlloc, file_id: FileId, lexer: lexer::Lexer, ) -> Result<(T, ParseErrors), ParseError>; /// Parse a value from a lexer with the given `file_id`, failing at the first encountered /// error. - fn parse_strict(&self, file_id: FileId, lexer: lexer::Lexer) -> Result; + fn parse_strict( + &self, + alloc: &'ast AstAlloc, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result; } -impl ErrorTolerantParser for P +impl<'ast, T, P> ErrorTolerantParser<'ast, T> for P where - P: LalrpopParser, + P: LalrpopParser<'ast, T>, { fn parse_tolerant( &self, + alloc: &'ast AstAlloc, file_id: FileId, lexer: lexer::Lexer, ) -> Result<(T, ParseErrors), ParseError> { let mut parse_errors = Vec::new(); let mut next_wildcard_id = 0; let result = self - .parse(file_id, &mut parse_errors, &mut next_wildcard_id, lexer) + .parse( + alloc, + file_id, + &mut parse_errors, + &mut next_wildcard_id, + lexer, + ) .map_err(|err| ParseError::from_lalrpop(err, file_id)); let parse_errors = ParseErrors::from_recoverable(parse_errors, file_id); @@ -134,11 +155,81 @@ where } } - fn parse_strict(&self, file_id: FileId, lexer: lexer::Lexer) -> Result { - match self.parse_tolerant(file_id, lexer) { + fn parse_strict( + &self, + alloc: &'ast AstAlloc, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result { + match self.parse_tolerant(alloc, file_id, lexer) { Ok((t, e)) if e.no_errors() => Ok(t), Ok((_, e)) => Err(e), Err(e) => Err(e.into()), } } } + +/// General interface of the various specialized Nickel parsers. +/// +/// This trait is a compatibility layer version of [ErrorTolerantParser]. It produces data of the +/// old, mainline types because the current pipeline still depends on them (defined in +/// [crate::term]). Eventually we'll get rid of it and only use [ErrorTolerantParser], which +/// produces the new AST instead. +pub trait ErrorTolerantParserCompat { + /// Parse a value from a lexer with the given `file_id` in an error-tolerant way. This methods + /// can still fail for non-recoverable errors. + fn parse_tolerant_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<(T, ParseErrors), ParseError>; + + /// Parse a value from a lexer with the given `file_id`, failing at the first encountered + /// error. + fn parse_strict_compat(&self, file_id: FileId, lexer: lexer::Lexer) -> Result; +} + +impl<'ast> FromAst>> for ExtendedTerm { + fn from_ast(ast: &ExtendedTerm>) -> Self { + match ast { + ExtendedTerm::Expr(t) => ExtendedTerm::Expr(t.to_mainline()), + ExtendedTerm::ToplevelLet(ident, t) => { + ExtendedTerm::ToplevelLet(*ident, t.to_mainline()) + } + } + } +} + +// Generate boilerplate impl to produce legacy mainline types from the available parsers. +macro_rules! generate_compat_impl { + ($parser:ty, $output:ty) => { + impl ErrorTolerantParserCompat<$output> for $parser { + fn parse_tolerant_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<($output, ParseErrors), ParseError> { + let alloc = AstAlloc::new(); + self.parse_tolerant(&alloc, file_id, lexer) + .map(|(t, e)| (t.to_mainline(), e)) + } + + fn parse_strict_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<$output, ParseErrors> { + let alloc = AstAlloc::new(); + self.parse_strict(&alloc, file_id, lexer) + .map(|t| t.to_mainline()) + } + } + }; +} + +generate_compat_impl!( + grammar::ExtendedExprParser, + ExtendedTerm +); +generate_compat_impl!(grammar::ExprParser, crate::term::RichTerm); +generate_compat_impl!(grammar::FixedTypeParser, crate::typ::Type); diff --git a/core/src/parser/tests.rs b/core/src/parser/tests.rs index 952a77bdeb..37de6322cd 100644 --- a/core/src/parser/tests.rs +++ b/core/src/parser/tests.rs @@ -3,7 +3,7 @@ use super::utils::{build_record, FieldPathElem}; use crate::error::ParseError; use crate::files::Files; use crate::identifier::LocIdent; -use crate::parser::{error::ParseError as InternalParseError, ErrorTolerantParser}; +use crate::parser::{error::ParseError as InternalParseError, ErrorTolerantParserCompat}; use crate::term::Number; use crate::term::Term::*; use crate::term::{make as mk_term, Term}; @@ -15,7 +15,7 @@ use assert_matches::assert_matches; fn parse(s: &str) -> Result { let id = Files::new().add("", String::from(s)); - super::grammar::TermParser::new() + super::grammar::ExprParser::new() .parse_strict(id, Lexer::new(s)) .map_err(|errs| errs.errors.first().unwrap().clone()) } diff --git a/core/src/parser/uniterm.rs b/core/src/parser/uniterm.rs index 7aa8f624f6..613511f0b6 100644 --- a/core/src/parser/uniterm.rs +++ b/core/src/parser/uniterm.rs @@ -78,28 +78,34 @@ impl<'ast> UniTerm<'ast> { } } -trait TryFromUni<'ast, T> +/// Similar to `TryFrom`, but takes an additional allocator for conversion from and to +/// [crate::bytecode::ast::Ast] that requires to thread an explicit allocator. +/// +/// We chose a different name than `try_from` for the method - although it has a different +/// signature from the standard `TryFrom` (two arguments vs one) - to avoid confusing the compiler +/// which would otherwise have difficulties disambiguating calls like `Ast::try_from`. +pub(crate) trait TryConvert<'ast, T> where Self: Sized, { type Error; - fn try_from_uni(alloc: &'ast AstAlloc, uni: T) -> Result; + fn try_convert(alloc: &'ast AstAlloc, from: T) -> Result; } // For nodes such as `Type` or `Record`, the following implementation has to choose between two // positions to use: the one of the wrapping `UniTerm`, and the one stored inside the `RichTerm` or // the `Type`. This implementation assumes that the latest set is the one of `UniTerm`, which is // the single source of truth. -impl<'ast> TryFromUni<'ast, UniTerm<'ast>> for Type<'ast> { +impl<'ast> TryConvert<'ast, UniTerm<'ast>> for Type<'ast> { type Error = ParseError; - fn try_from_uni(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result { + fn try_convert(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result { let pos = ut.pos; let typ = match ut.node { UniTermNode::Var(id) => TypeF::Var(id.ident()), - UniTermNode::Record(r) => Type::try_from_uni(alloc, r)?.typ, + UniTermNode::Record(r) => Type::try_convert(alloc, r)?.typ, UniTermNode::Type(ty) => ty.typ, UniTermNode::Term(ast) => { if matches!( @@ -125,15 +131,15 @@ impl<'ast> TryFromUni<'ast, UniTerm<'ast>> for Type<'ast> { } } -impl<'ast> TryFromUni<'ast, UniTerm<'ast>> for Ast<'ast> { +impl<'ast> TryConvert<'ast, UniTerm<'ast>> for Ast<'ast> { type Error = ParseError; - fn try_from_uni(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result { + fn try_convert(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result { let UniTerm { node, pos } = ut; let node = match node { UniTermNode::Var(id) => Node::Var(id), - UniTermNode::Record(r) => Ast::try_from_uni(alloc, r)?.node, + UniTermNode::Record(r) => Ast::try_convert(alloc, r)?.node, UniTermNode::Type(typ) => { let typ = typ.fix_type_vars(alloc, pos.unwrap())?; @@ -191,6 +197,17 @@ impl<'ast> From> for UniTerm<'ast> { } } +impl<'ast, T, U> TryConvert<'ast, T> for U +where + U: TryFrom, +{ + type Error = U::Error; + + fn try_convert(_: &AstAlloc, from: T) -> Result { + U::try_from(from) + } +} + /// A record in the `UniTerm` syntax. #[derive(Clone)] pub struct UniRecord<'ast> { @@ -463,7 +480,7 @@ impl<'ast> UniRecord<'ast> { } } -impl<'ast> TryFromUni<'ast, UniRecord<'ast>> for Ast<'ast> { +impl<'ast> TryConvert<'ast, UniRecord<'ast>> for Ast<'ast> { type Error = ParseError; /// Convert a `UniRecord` to a term. If the `UniRecord` is syntactically a record type or it @@ -477,7 +494,7 @@ impl<'ast> TryFromUni<'ast, UniRecord<'ast>> for Ast<'ast> { /// /// We also fix the type variables of the type appearing inside annotations (see in-code /// documentation of the private symbol `FixTypeVars::fix_type_vars`). - fn try_from_uni(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result { + fn try_convert(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result { let pos = ur.pos; // First try to interpret this record as a type. @@ -514,14 +531,14 @@ impl<'ast> TryFromUni<'ast, UniRecord<'ast>> for Ast<'ast> { } /// Try to convert a `UniRecord` to a type. The strict part means that the `UniRecord` must be -impl<'ast> TryFromUni<'ast, UniRecord<'ast>> for Type<'ast> { +impl<'ast> TryConvert<'ast, UniRecord<'ast>> for Type<'ast> { type Error = ParseError; /// Convert a `UniRecord` to a type. If the `UniRecord` has a tail, it is interpreted strictly /// as a type and fail if it isn't a plain record type. Otherwise, we first try to interpret it /// as a plain record type, and if that doesn't work, we interpret it as a term and wrap it /// back as a user-defined contract. - fn try_from_uni(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result { + fn try_convert(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result { let pos = ur.pos; if let Some((_, tail_pos)) = ur.tail { @@ -534,7 +551,7 @@ impl<'ast> TryFromUni<'ast, UniRecord<'ast>> for Type<'ast> { } else { let pos = ur.pos; ur.clone().into_type_strict(alloc).or_else(|_| { - Ast::try_from_uni(alloc, ur).map(|ast| Type { + Ast::try_convert(alloc, ur).map(|ast| Type { typ: TypeF::Contract(alloc.ast(ast)), pos, }) diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index dfeac36771..9d7dcaefd2 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -213,7 +213,7 @@ pub enum RecordLastField<'ast> { } /// An infix operator that is not applied. Used for the curried operator syntax (e.g `(==)`) -pub struct InfixOp(primop::PrimOp); +pub(super) struct InfixOp(pub(super) primop::PrimOp); impl InfixOp { /// Eta-expand an operator. This wraps an operator, for example `==`, as a function `fun x1 x2 diff --git a/core/src/pretty.rs b/core/src/pretty.rs index 7a04b1e3c6..2d357e747f 100644 --- a/core/src/pretty.rs +++ b/core/src/pretty.rs @@ -1383,8 +1383,8 @@ mod tests { use crate::files::Files; use crate::parser::lexer::Lexer; use crate::parser::{ - grammar::{FixedTypeParser, TermParser}, - ErrorTolerantParser, + grammar::{ExprParser, FixedTypeParser}, + ErrorTolerantParserCompat, }; use pretty::Doc; @@ -1396,7 +1396,7 @@ mod tests { let id = Files::new().add("", s); FixedTypeParser::new() - .parse_strict(id, Lexer::new(s)) + .parse_strict_compat(id, Lexer::new(s)) .unwrap() } @@ -1404,7 +1404,7 @@ mod tests { fn parse_term(s: &str) -> RichTerm { let id = Files::new().add("", s); - TermParser::new().parse_strict(id, Lexer::new(s)).unwrap() + ExprParser::new().parse_strict(id, Lexer::new(s)).unwrap() } /// Parse a string representation `long` of a type, and assert that diff --git a/core/src/program.rs b/core/src/program.rs index d59ecf32a2..0eb6be940b 100644 --- a/core/src/program.rs +++ b/core/src/program.rs @@ -67,7 +67,9 @@ impl FieldPath { /// Indeed, there's no such thing as a valid empty field path (at least from the parsing point /// of view): if `input` is empty, or consists only of spaces, `parse` returns a parse error. pub fn parse(cache: &mut Cache, input: String) -> Result { - use crate::parser::{grammar::StaticFieldPathParser, lexer::Lexer, ErrorTolerantParser}; + use crate::parser::{ + grammar::StaticFieldPathParser, lexer::Lexer, ErrorTolerantParserCompat, + }; let input_id = cache.replace_string(SourcePath::Query, input); let s = cache.source(input_id); @@ -140,14 +142,16 @@ impl FieldOverride { assignment: String, priority: MergePriority, ) -> Result { - use crate::parser::{grammar::CliFieldAssignmentParser, lexer::Lexer, ErrorTolerantParser}; + use crate::parser::{ + grammar::CliFieldAssignmentParser, lexer::Lexer, ErrorTolerantParserCompat, + }; let input_id = cache.replace_string(SourcePath::CliFieldAssignment, assignment); let s = cache.source(input_id); let parser = CliFieldAssignmentParser::new(); let (path, _, span_value) = parser - .parse_strict(input_id, Lexer::new(s)) + .parse_strict_compat(input_id, Lexer::new(s)) // We just need to report an error here .map_err(|mut errs| { errs.errors.pop().expect( diff --git a/core/src/repl/mod.rs b/core/src/repl/mod.rs index 3789792d39..8a7a3b5861 100644 --- a/core/src/repl/mod.rs +++ b/core/src/repl/mod.rs @@ -16,7 +16,7 @@ use crate::eval::cache::Cache as EvalCache; use crate::eval::{Closure, VirtualMachine}; use crate::files::FileId; use crate::identifier::LocIdent; -use crate::parser::{grammar, lexer, ErrorTolerantParser, ExtendedTerm}; +use crate::parser::{grammar, lexer, ErrorTolerantParserCompat, ExtendedTerm}; use crate::program::FieldPath; use crate::term::TraverseOrder; use crate::term::{record::Field, RichTerm, Term, Traverse}; @@ -81,7 +81,7 @@ pub trait Repl { /// Standard implementation of the REPL backend. pub struct ReplImpl { /// The parser, supporting toplevel let declaration. - parser: grammar::ExtendedTermParser, + parser: grammar::ExtendedExprParser, /// The current environment (for evaluation and typing). Contain the initial environment with /// the stdlib, plus toplevel declarations and loadings made inside the REPL. env: Envs, @@ -96,7 +96,7 @@ impl ReplImpl { /// Create a new empty REPL. pub fn new(trace: impl Write + 'static) -> Self { ReplImpl { - parser: grammar::ExtendedTermParser::new(), + parser: grammar::ExtendedExprParser::new(), env: Envs::new(), initial_type_ctxt: typecheck::Context::new(), vm: VirtualMachine::new(Cache::new(ErrorTolerance::Strict), trace, NullReporter {}), @@ -190,7 +190,7 @@ impl ReplImpl { let (term, parse_errs) = self .parser - .parse_tolerant(file_id, lexer::Lexer::new(exp))?; + .parse_tolerant_compat(file_id, lexer::Lexer::new(exp))?; if !parse_errs.no_errors() { return Err(parse_errs.into()); @@ -394,7 +394,7 @@ pub enum InputStatus { ) )] pub struct InputParser { - parser: grammar::ExtendedTermParser, + parser: grammar::ExtendedExprParser, /// Currently the parser expect a `FileId` to fill in location information. For this /// validator, this may be a dummy one, since for now location information is not used. file_id: FileId, @@ -403,7 +403,7 @@ pub struct InputParser { impl InputParser { pub fn new(file_id: FileId) -> Self { InputParser { - parser: grammar::ExtendedTermParser::new(), + parser: grammar::ExtendedExprParser::new(), file_id, } } @@ -415,7 +415,7 @@ impl InputParser { let result = self .parser - .parse_tolerant(self.file_id, lexer::Lexer::new(input)); + .parse_tolerant_compat(self.file_id, lexer::Lexer::new(input)); let partial = |pe| { matches!( diff --git a/core/src/typ.rs b/core/src/typ.rs index f443f7f841..5b51175c62 100644 --- a/core/src/typ.rs +++ b/core/src/typ.rs @@ -1909,7 +1909,7 @@ impl PrettyPrintCap for Type {} #[cfg(test)] mod tests { use super::*; - use crate::parser::{grammar::FixedTypeParser, lexer::Lexer, ErrorTolerantParser}; + use crate::parser::{grammar::FixedTypeParser, lexer::Lexer, ErrorTolerantParserCompat}; /// Parse a type represented as a string. fn parse_type(s: &str) -> Type { @@ -1917,7 +1917,7 @@ mod tests { let id = Files::new().add("", s); FixedTypeParser::new() - .parse_strict(id, Lexer::new(s)) + .parse_strict_compat(id, Lexer::new(s)) .unwrap() } diff --git a/core/tests/integration/typecheck_fail.rs b/core/tests/integration/typecheck_fail.rs index 816673a3e0..f829b47699 100644 --- a/core/tests/integration/typecheck_fail.rs +++ b/core/tests/integration/typecheck_fail.rs @@ -16,7 +16,7 @@ fn type_check_expr(s: impl std::string::ToString) -> Result<(), TypecheckError> let s = s.to_string(); let id = Files::new().add("", s.clone()); type_check( - &grammar::TermParser::new() + &grammar::ExprParser::new() .parse_strict(id, lexer::Lexer::new(&s)) .unwrap(), ) diff --git a/lsp/nls/src/analysis.rs b/lsp/nls/src/analysis.rs index 205c093116..52fa8fe709 100644 --- a/lsp/nls/src/analysis.rs +++ b/lsp/nls/src/analysis.rs @@ -438,7 +438,7 @@ mod tests { use nickel_lang_core::{ files::Files, identifier::Ident, - parser::{grammar, lexer, ErrorTolerantParser as _}, + parser::{grammar, lexer, ErrorTolerantParserCompat as _}, term::Term, }; @@ -479,7 +479,7 @@ mod tests { let s = "{ field. }"; let file = Files::new().add("", s.to_owned()); - let (rt, _errors) = grammar::TermParser::new() + let (rt, _errors) = grammar::ExprParser::new() .parse_tolerant(file, lexer::Lexer::new(s)) .unwrap(); diff --git a/lsp/nls/src/position.rs b/lsp/nls/src/position.rs index 09f44d5f23..6abc9e57d5 100644 --- a/lsp/nls/src/position.rs +++ b/lsp/nls/src/position.rs @@ -207,7 +207,7 @@ pub(crate) mod tests { use codespan::ByteIndex; use nickel_lang_core::{ files::{FileId, Files}, - parser::{grammar, lexer, ErrorTolerantParser}, + parser::{grammar, lexer, ErrorTolerantParserCompat}, term::{RichTerm, Term, UnaryOp}, }; @@ -216,7 +216,7 @@ pub(crate) mod tests { pub fn parse(s: &str) -> (FileId, RichTerm) { let id = Files::new().add("", String::from(s)); - let term = grammar::TermParser::new() + let term = grammar::ExprParser::new() .parse_strict(id, lexer::Lexer::new(s)) .unwrap(); (id, term) diff --git a/utils/src/test_program.rs b/utils/src/test_program.rs index 31a1136ba2..f17c23e0f2 100644 --- a/utils/src/test_program.rs +++ b/utils/src/test_program.rs @@ -2,7 +2,7 @@ use nickel_lang_core::{ error::{Error, NullReporter, ParseError}, eval::cache::CacheImpl, files::Files, - parser::{grammar, lexer, ErrorTolerantParser, ExtendedTerm}, + parser::{grammar, lexer, ErrorTolerantParserCompat, ExtendedTerm}, program::Program, term::{RichTerm, Term}, typecheck::TypecheckMode, @@ -35,7 +35,7 @@ pub fn eval_file(f: &str) -> Result { pub fn parse(s: &str) -> Result { let id = Files::new().add("", String::from(s)); - grammar::TermParser::new() + grammar::ExprParser::new() .parse_strict(id, lexer::Lexer::new(s)) .map_err(|errs| errs.errors.first().unwrap().clone()) } @@ -43,8 +43,8 @@ pub fn parse(s: &str) -> Result { pub fn parse_extended(s: &str) -> Result { let id = Files::new().add("", String::from(s)); - grammar::ExtendedTermParser::new() - .parse_strict(id, lexer::Lexer::new(s)) + grammar::ExtendedExprParser::new() + .parse_strict_compat(id, lexer::Lexer::new(s)) .map_err(|errs| errs.errors.first().unwrap().clone()) } From 3a611b1895d0bd5aaabdf452c0d618dc3bfda720 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 19 Nov 2024 14:45:11 +0100 Subject: [PATCH 03/23] Fix last errors to make it compile --- core/src/bytecode/ast/compat.rs | 2 +- core/src/cache.rs | 7 +-- core/src/combine.rs | 20 ++++++++ core/src/parser/grammar.lalrpop | 84 ++++++++++++++++----------------- core/src/parser/mod.rs | 61 ++++++++++++++++++++---- core/src/parser/utils.rs | 10 ---- core/src/program.rs | 2 +- core/src/repl/mod.rs | 12 ++--- core/src/term/mod.rs | 20 ++++++++ core/src/term/record.rs | 22 +++++++++ utils/src/test_program.rs | 8 ++-- 11 files changed, 173 insertions(+), 75 deletions(-) diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index a1dd531bce..ecd8a22dfe 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -131,7 +131,7 @@ impl<'ast> FromMainline<'ast, term::pattern::ConstantPattern> for PatternData<'a term::pattern::ConstantPatternData::Null => ConstantPatternData::Null, }; - PatternData::Constant(alloc.constant_pattern(ConstantPattern { + PatternData::Constant(alloc.alloc(ConstantPattern { data, pos: pattern.pos, })) diff --git a/core/src/cache.rs b/core/src/cache.rs index c67f1d4063..52f872d352 100644 --- a/core/src/cache.rs +++ b/core/src/cache.rs @@ -586,7 +586,8 @@ impl Cache { InputFormat::Nickel => { let (t, parse_errs) = measure_runtime!( "runtime:parse:nickel", - parser::grammar::ExprParser::new().parse_tolerant(file_id, Lexer::new(buf))? + parser::grammar::TermParser::new() + .parse_tolerant_compat(file_id, Lexer::new(buf))? ); Ok((t, parse_errs)) @@ -1716,8 +1717,8 @@ pub mod resolvers { if let hash_map::Entry::Vacant(e) = self.term_cache.entry(file_id) { let buf = self.files.source(file_id); - let term = parser::grammar::ExprParser::new() - .parse_strict(file_id, Lexer::new(buf)) + let term = parser::grammar::TermParser::new() + .parse_strict_compat(file_id, Lexer::new(buf)) .map_err(|e| ImportError::ParseErrors(e, *pos))?; e.insert(term); Ok(( diff --git a/core/src/combine.rs b/core/src/combine.rs index a47381a821..c99f3217d6 100644 --- a/core/src/combine.rs +++ b/core/src/combine.rs @@ -18,3 +18,23 @@ pub trait Combine: Default { pub trait CombineAlloc<'ast> { fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self; } + +impl Combine for Option { + fn combine(left: Self, right: Self) -> Self { + match (left, right) { + (None, None) => None, + (None, Some(x)) | (Some(x), None) => Some(x), + (Some(left), Some(right)) => Some(Combine::combine(left, right)), + } + } +} + +impl<'ast, T: CombineAlloc<'ast>> CombineAlloc<'ast> for Option { + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { + match (left, right) { + (None, None) => None, + (None, Some(x)) | (Some(x), None) => Some(x), + (Some(left), Some(right)) => Some(CombineAlloc::combine(alloc, left, right)), + } + } +} diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 75fc7bb36d..e9351cf868 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -31,7 +31,7 @@ //! possibly wrapped as a `UniTerm`). //! //! In consequence, this grammar uses three main types `RichTerm`, `Type` and -//! `UniTerm`, as well as conversion macros `AsExpr`, `AsType` and `AsUniTerm`. +//! `UniTerm`, as well as conversion macros `AsTerm`, `AsType` and `AsUniTerm`. //! //! Rules that are known to only produce `RichTerm` or `Type` may have the //! corresponding more precise return type. Other rules that produce or just @@ -100,7 +100,7 @@ SpannedTy: Type<'ast> = => WithPos: Rule = => t.with_pos(mk_pos(src_id, left, right)); -AsExpr: Ast<'ast> = > =>? +AsTerm: Ast<'ast> = > =>? Ast::try_convert(alloc, ut) .map_err(|e| lalrpop_util::ParseError::User{error: e}); @@ -109,7 +109,7 @@ AsType: Type<'ast> = > =>? .map_err(|e| lalrpop_util::ParseError::User{error: e}); // Repeat a rule zero times or more with a separator interspersed, such that the last -// separator is optional: for example, Delimiter will both accept +// separator is optional: for example, Delimiter will both accept // `1,2` and `1,2,`. RepeatSep: Vec = Sep)*> Sep? => { elems.push(last); @@ -226,7 +226,7 @@ FieldAnnot: FieldMetadata<'ast> = AnnotSeries>>; // A general expression. Wrap the root of the grammar as an `Ast`. -pub Expr: Ast<'ast> = AsExpr; +pub Term: Ast<'ast> = AsTerm; // A general type. Chosen such that it can't have top-level annotations. // (see `AnnotAtom`) @@ -247,19 +247,19 @@ pub FixedType: Type<'ast> = { // Either an expression or a top-level let-binding (a let-binding without an // `in`). Used exclusively for the REPL. -pub ExtendedExpr: ExtendedTerm> = { - "let" ?> "=" => { +pub ExtendedTerm: ExtendedTerm> = { + "let" ?> "=" => { if let Some(ann) = ann { exp = ann.annotation.attach_to_ast(alloc, exp); } ExtendedTerm::ToplevelLet(id, exp) }, - Expr => ExtendedTerm::Expr(<>), + Term => ExtendedTerm::Term(<>), }; LetBinding: LetBinding<'ast> = { - ?> "=" => { + ?> "=" => { LetBinding { pattern, metadata: metadata.unwrap_or_default(), value } } } @@ -272,7 +272,7 @@ UniTerm: UniTerm<'ast> = { "let" > - "in" =>? { + "in" =>? { Ok(UniTerm::from(mk_let( alloc, recursive.is_some(), @@ -280,7 +280,7 @@ UniTerm: UniTerm<'ast> = { body, )?)) }, - "fun" "=>" => { + "fun" "=>" => { let pos = mk_pos(src_id, l, r); let expr = pats @@ -292,7 +292,7 @@ UniTerm: UniTerm<'ast> = { UniTerm::from(expr) }, - "if" "then" "else" => + "if" "then" "else" => UniTerm::from(alloc.if_then_else(cond, e1, e2)), => UniTerm::from(err), "import" =>? { @@ -307,7 +307,7 @@ UniTerm: UniTerm<'ast> = { }; AnnotatedInfixExpr: UniTerm<'ast> = { - > > => { + > > => { UniTerm::from(ann.attach_to_ast(alloc, e)) }, }; @@ -339,17 +339,17 @@ Forall: TypeUnr<'ast> = ApplicativeHead: UniTerm<'ast> = { Atom, AsUniTerm>, - > => UniTerm::from(primop_app!(alloc, op, t)), - > > + > => UniTerm::from(primop_app!(alloc, op, t)), + > > => UniTerm::from(primop_app!(alloc, op, t1, t2)), - NOpPre>, + NOpPre>, "match" "{" > "}" => UniTerm::from(alloc.match_expr(branches)), }; // A n-ary application-like expression (n may be 0, in the sense that this rule // also includes previous levels). Applicative: UniTerm<'ast> = { - > *> => { + > *> => { let node = match &head { // A zero-ary application is just the head. _ if args.is_empty() => head.node, @@ -376,9 +376,9 @@ TypeArray: TypeUnr<'ast> = "Array" > => // A record operation chain, such as `{foo = data}.bar.baz`. RecordOperationChain: Node<'ast> = { - > "." => + > "." => alloc.prim_op(PrimOp::RecordStatAccess(id), iter::once(e)), - > "." > => mk_access(alloc, t_id, e), + > "." > => mk_access(alloc, t_id, e), }; RecordRowTail: RecordRows<'ast> = { @@ -435,7 +435,7 @@ Atom: UniTerm<'ast> = { Ident => UniTerm::from(UniTermNode::Var(<>)), WithPos => UniTerm::from(UniTermNode::Record(<>)), EnumTag => UniTerm::from(Node::EnumVariant { tag: <>, arg: None }), - "[" > "]" => UniTerm::from(alloc.array(<>)), + "[" > "]" => UniTerm::from(alloc.array(<>)), AsUniTerm>, AsUniTerm>, }; @@ -447,7 +447,7 @@ RecordField: FieldDef<'ast> = { ?> - )?> => { FieldDef { path, @@ -532,13 +532,13 @@ pub StaticFieldPath: Vec = =>? // alone can't encode this invariant. // // We could just return a `Node` instead of a `Ast`, as position information is -// already stored in the span. But the rule produces an Ast anyway, so +// already stored in the span. But the rule produces an Ast anyway, so // it's simpler to just return it instead of artificially deconstructing it. // // This rule is currently only used for the CLI and isn't part of the grammar // for normal Nickel source code. pub CliFieldAssignment: (Vec, Ast<'ast>, RawSpan) = - "=" + "=" => (path, value, mk_span(src_id, start, end)); FieldPathElem: FieldPathElem<'ast> = { @@ -709,7 +709,7 @@ ConstantPatternData: ConstantPatternData<'ast> = { }; RecordPattern: RecordPattern<'ast> = { - "{" ",")*> "}" =>? { + "{" ",")*> "}" =>? { let pattern = RecordPattern { patterns: alloc.field_patterns(field_pats), tail, @@ -722,7 +722,7 @@ RecordPattern: RecordPattern<'ast> = { }; ArrayPattern: ArrayPattern<'ast> = { - "[" ",")*> "]" => { + "[" ",")*> "]" => { ArrayPattern { patterns: alloc.patterns(patterns), tail, @@ -854,7 +854,7 @@ OrPatternUnparens: OrPattern<'ast> = { // exact sized iterator to know beforehand how much memory it needs to // reserve let patterns : Vec<_> = - patterns.into_iter().chain(std::iter::once(last)).collect(); + patterns.into_iter().chain(iter::once(last)).collect(); OrPattern { patterns: alloc.patterns(patterns), @@ -913,7 +913,7 @@ TailPattern: TailPattern = { } // A default annotation in a pattern. -DefaultAnnot: Ast<'ast> = "?" ; +DefaultAnnot: Ast<'ast> = "?" ; // A metadata keyword returned as an indent. In some positions, those are // considered valid identifiers. See ExtendedIdent below. @@ -965,9 +965,9 @@ Bool: bool = { // Depending on the opening brace, these either parse as strings, or as // "symbolic strings", which get desugared here to an array of terms. StringChunks: Node<'ast> = { - // The lexer emits a stream of groups of `ChunkExpr` interspersed by one + // The lexer emits a stream of groups of `ChunkTerm` interspersed by one // `ChunkLiteral`: consecutive chunks literals are fused by the lexer. - => { + => { debug_assert!( start.is_closed_by(&end), "Fatal parser error: a string starting with {start:?} should never be closed by {end:?}" @@ -1045,7 +1045,7 @@ ChunkLiteral : String = }; // An interpolated expression in a string: `%{}`. -ChunkExpr: StringChunk> = Interpolation "}" => StringChunk::Expr(<>, 0); +ChunkTerm: StringChunk> = Interpolation "}" => StringChunk::Expr(<>, 0); // The opening sequence of string interpolation. Interpolation = { "%{", "multstr %{" }; @@ -1161,10 +1161,10 @@ UOp: PrimOp = { "number/tan" => PrimOp::NumberTan, } -PatternGuard: Ast<'ast> = "if" => <>; +PatternGuard: Ast<'ast> = "if" => <>; MatchBranch: MatchBranch<'ast> = - "=>" => + "=>" => MatchBranch { pattern, guard, body}; // Infix operators by precedence levels. Lowest levels take precedence over @@ -1290,15 +1290,15 @@ CurriedOp: Node<'ast> = { }, } -InfixUOpApp: UniTerm<'ast> = - > => UniTerm::from(alloc.prim_op(op, e)); +InfixUOpApp: UniTerm<'ast> = + > => UniTerm::from(alloc.prim_op(op, iter::once(e))); -InfixBOpApp: UniTerm<'ast> = - > > => +InfixBOpApp: UniTerm<'ast> = + > > => UniTerm::from(primop_app!(alloc, op, e1, e2)); -InfixLazyBOpApp: UniTerm<'ast> = - > > => +InfixLazyBOpApp: UniTerm<'ast> = + > > => UniTerm::from(app!(alloc, primop_app!(alloc, op, e1), e2)); InfixExpr: UniTerm<'ast> = { @@ -1306,7 +1306,7 @@ InfixExpr: UniTerm<'ast> = { Applicative, #[precedence(level="1")] - "-" > => + "-" > => UniTerm::from(primop_app!(alloc, PrimOp::Sub, alloc.number(Number::ZERO), <>)), #[precedence(level="2")] #[assoc(side="left")] @@ -1322,10 +1322,10 @@ InfixExpr: UniTerm<'ast> = { InfixUOpApp, #[precedence(level="6")] #[assoc(side="left")] - > "&" > => + > "&" > => UniTerm::from(primop_app!(alloc, PrimOp::Merge(MergeKind::Standard), t1, t2)), - > "|>" > => + > "|>" > => UniTerm::from(app!(alloc, t2, t1)), #[precedence(level="7")] #[assoc(side="left")] @@ -1333,7 +1333,7 @@ InfixExpr: UniTerm<'ast> = { #[precedence(level="8")] #[assoc(side="left")] InfixBOpApp, - > "!=" > => + > "!=" > => UniTerm::from( primop_app!( alloc, @@ -1442,7 +1442,7 @@ TypeEnum: TypeUnr<'ast> = "[|" ",")*> } ); - EnumRows(TypeF::Enum(ty)) + TypeF::Enum(EnumRows(ty)) }; TypeAtom: TypeUnr<'ast> = { diff --git a/core/src/parser/mod.rs b/core/src/parser/mod.rs index cc45aebcf5..0b05160520 100644 --- a/core/src/parser/mod.rs +++ b/core/src/parser/mod.rs @@ -25,16 +25,18 @@ pub mod utils; #[cfg(test)] mod tests; -/// Either a term or a toplevel let declaration. +/// Either an expression or a toplevel let declaration. +/// /// Used exclusively in the REPL to allow the defining of variables without having to specify `in`. -/// For instance: +/// For example: +/// /// ```text /// nickel>let foo = 1 /// nickel>foo /// 1 /// ``` pub enum ExtendedTerm { - Expr(T), + Term(T), ToplevelLet(LocIdent, T), } @@ -94,8 +96,8 @@ macro_rules! generate_lalrpop_parser_impl { }; } -generate_lalrpop_parser_impl!(grammar::ExtendedExprParser, ExtendedTerm>); -generate_lalrpop_parser_impl!(grammar::ExprParser, Ast<'ast>); +generate_lalrpop_parser_impl!(grammar::ExtendedTermParser, ExtendedTerm>); +generate_lalrpop_parser_impl!(grammar::TermParser, Ast<'ast>); generate_lalrpop_parser_impl!(grammar::FixedTypeParser, Type<'ast>); generate_lalrpop_parser_impl!(grammar::StaticFieldPathParser, Vec); generate_lalrpop_parser_impl!( @@ -192,7 +194,7 @@ pub trait ErrorTolerantParserCompat { impl<'ast> FromAst>> for ExtendedTerm { fn from_ast(ast: &ExtendedTerm>) -> Self { match ast { - ExtendedTerm::Expr(t) => ExtendedTerm::Expr(t.to_mainline()), + ExtendedTerm::Term(t) => ExtendedTerm::Term(t.to_mainline()), ExtendedTerm::ToplevelLet(ident, t) => { ExtendedTerm::ToplevelLet(*ident, t.to_mainline()) } @@ -228,8 +230,51 @@ macro_rules! generate_compat_impl { } generate_compat_impl!( - grammar::ExtendedExprParser, + grammar::ExtendedTermParser, ExtendedTerm ); -generate_compat_impl!(grammar::ExprParser, crate::term::RichTerm); +generate_compat_impl!(grammar::TermParser, crate::term::RichTerm); generate_compat_impl!(grammar::FixedTypeParser, crate::typ::Type); + +// We could have implemented ToMainline +impl<'ast> ErrorTolerantParserCompat<(Vec, crate::term::RichTerm, RawSpan)> + for grammar::CliFieldAssignmentParser +{ + fn parse_tolerant_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<((Vec, crate::term::RichTerm, RawSpan), ParseErrors), ParseError> { + self.parse_tolerant(&AstAlloc::new(), file_id, lexer) + .map(|((path, term, span), e)| ((path, term.to_mainline(), span), e)) + } + + fn parse_strict_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<(Vec, crate::term::RichTerm, RawSpan), ParseErrors> { + self.parse_strict(&AstAlloc::new(), file_id, lexer) + .map(|(path, term, span)| (path, term.to_mainline(), span)) + } +} + +// This implementation doesn't do any conversion, but hide away the (useless, in this case) +// [crate::bytecode::ast::AstAlloc] parameter. +impl ErrorTolerantParserCompat> for grammar::StaticFieldPathParser { + fn parse_tolerant_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<(Vec, ParseErrors), ParseError> { + self.parse_tolerant(&AstAlloc::new(), file_id, lexer) + } + + fn parse_strict_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result, ParseErrors> { + self.parse_strict(&AstAlloc::new(), file_id, lexer) + } +} diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 9d7dcaefd2..b7307f0067 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -268,16 +268,6 @@ pub trait AttachToAst<'ast, T> { fn attach_to_ast(self, alloc: &'ast AstAlloc, ast: Ast<'ast>) -> T; } -impl<'ast, T: CombineAlloc<'ast>> CombineAlloc<'ast> for Option { - fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { - match (left, right) { - (None, None) => None, - (None, Some(x)) | (Some(x), None) => Some(x), - (Some(left), Some(right)) => Some(CombineAlloc::combine(alloc, left, right)), - } - } -} - impl<'ast> CombineAlloc<'ast> for FieldMetadata<'ast> { /// Combine two field metadata into one. If data that can't be combined (typically, the /// documentation or the type annotation) are set by both, the left one's are kept. diff --git a/core/src/program.rs b/core/src/program.rs index 0eb6be940b..f685c92ef9 100644 --- a/core/src/program.rs +++ b/core/src/program.rs @@ -76,7 +76,7 @@ impl FieldPath { let parser = StaticFieldPathParser::new(); let field_path = parser - .parse_strict(input_id, Lexer::new(s)) + .parse_strict_compat(input_id, Lexer::new(s)) // We just need to report an error here .map_err(|mut errs| { errs.errors.pop().expect( diff --git a/core/src/repl/mod.rs b/core/src/repl/mod.rs index 8a7a3b5861..03998d024b 100644 --- a/core/src/repl/mod.rs +++ b/core/src/repl/mod.rs @@ -81,7 +81,7 @@ pub trait Repl { /// Standard implementation of the REPL backend. pub struct ReplImpl { /// The parser, supporting toplevel let declaration. - parser: grammar::ExtendedExprParser, + parser: grammar::ExtendedTermParser, /// The current environment (for evaluation and typing). Contain the initial environment with /// the stdlib, plus toplevel declarations and loadings made inside the REPL. env: Envs, @@ -96,7 +96,7 @@ impl ReplImpl { /// Create a new empty REPL. pub fn new(trace: impl Write + 'static) -> Self { ReplImpl { - parser: grammar::ExtendedExprParser::new(), + parser: grammar::ExtendedTermParser::new(), env: Envs::new(), initial_type_ctxt: typecheck::Context::new(), vm: VirtualMachine::new(Cache::new(ErrorTolerance::Strict), trace, NullReporter {}), @@ -197,7 +197,7 @@ impl ReplImpl { } match term { - ExtendedTerm::RichTerm(t) => { + ExtendedTerm::Term(t) => { let t = self.prepare(None, t)?; Ok(eval_function( &mut self.vm, @@ -370,7 +370,7 @@ pub enum InitError { } pub enum InputStatus { - Complete(ExtendedTerm), + Complete(ExtendedTerm), Partial, Command, Failed(ParseErrors), @@ -394,7 +394,7 @@ pub enum InputStatus { ) )] pub struct InputParser { - parser: grammar::ExtendedExprParser, + parser: grammar::ExtendedTermParser, /// Currently the parser expect a `FileId` to fill in location information. For this /// validator, this may be a dummy one, since for now location information is not used. file_id: FileId, @@ -403,7 +403,7 @@ pub struct InputParser { impl InputParser { pub fn new(file_id: FileId) -> Self { InputParser { - parser: grammar::ExtendedExprParser::new(), + parser: grammar::ExtendedTermParser::new(), file_id, } } diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index d8c0c27751..0b153d9c41 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -22,6 +22,7 @@ use string::NickelString; use crate::{ cache::InputFormat, + combine::Combine, error::{EvalError, ParseError}, eval::{cache::CacheIndex, Environment}, files::FileId, @@ -908,6 +909,25 @@ impl TypeAnnotation { } } +impl Combine for TypeAnnotation { + fn combine(left: Self, right: Self) -> Self { + let (typ, leftover) = match (left.typ, right.typ) { + (left_ty @ Some(_), right_ty @ Some(_)) => (left_ty, right_ty), + (left_ty, right_ty) => (left_ty.or(right_ty), None), + }; + + let contracts: Vec<_> = left + .contracts + .iter() + .cloned() + .chain(leftover) + .chain(right.contracts.iter().cloned()) + .collect(); + + TypeAnnotation { typ, contracts } + } +} + impl From for LetMetadata { fn from(annotation: TypeAnnotation) -> Self { LetMetadata { diff --git a/core/src/term/record.rs b/core/src/term/record.rs index c938ca3c08..6b1a9c9d31 100644 --- a/core/src/term/record.rs +++ b/core/src/term/record.rs @@ -129,6 +129,28 @@ impl FieldMetadata { } } +impl Combine for FieldMetadata { + fn combine(left: Self, right: Self) -> Self { + let priority = match (left.priority, right.priority) { + // Neutral corresponds to the case where no priority was specified. In that case, the + // other priority takes precedence. + (MergePriority::Neutral, p) | (p, MergePriority::Neutral) => p, + // Otherwise, we keep the maximum of both priorities, as we would do when merging + // values. + (p1, p2) => std::cmp::max(p1, p2), + }; + + FieldMetadata { + doc: crate::eval::merge::merge_doc(left.doc, right.doc), + annotation: Combine::combine(left.annotation, right.annotation), + opt: left.opt || right.opt, + // The resulting field will be suppressed from serialization if either of the fields to be merged is. + not_exported: left.not_exported || right.not_exported, + priority, + } + } +} + impl From for FieldMetadata { fn from(annotation: TypeAnnotation) -> Self { FieldMetadata { diff --git a/utils/src/test_program.rs b/utils/src/test_program.rs index f17c23e0f2..71556cf786 100644 --- a/utils/src/test_program.rs +++ b/utils/src/test_program.rs @@ -35,15 +35,15 @@ pub fn eval_file(f: &str) -> Result { pub fn parse(s: &str) -> Result { let id = Files::new().add("", String::from(s)); - grammar::ExprParser::new() - .parse_strict(id, lexer::Lexer::new(s)) + grammar::TermParser::new() + .parse_strict_compat(id, lexer::Lexer::new(s)) .map_err(|errs| errs.errors.first().unwrap().clone()) } -pub fn parse_extended(s: &str) -> Result { +pub fn parse_extended(s: &str) -> Result, ParseError> { let id = Files::new().add("", String::from(s)); - grammar::ExtendedExprParser::new() + grammar::ExtendedTermParser::new() .parse_strict_compat(id, lexer::Lexer::new(s)) .map_err(|errs| errs.errors.first().unwrap().clone()) } From 4c58c474291ff626c69cc17d460713a6410dd6cf Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 19 Nov 2024 15:03:55 +0100 Subject: [PATCH 04/23] Remove bytecode-experimental feature As we move toward a bytecode compiler and a bytecode virtual machine, we are replacing the left part of the pipeline with the new AST representation. The bytecode module was previously gated by an experimental feature, thea idea being that this feature would enable the whole bytcode compiler pipeline. However, for now, we only have a new AST representation, and it's being used in the mainline Nickel parser (and soon, in the typechecker, etc.). Thus we need access to the new AST representation by default, and it doesn't make much sense to gate it behind a feature. We'll reintroduce the feature once we have a prototype compiler and a bytecode virtual machine, when it will then make sense to use the feature to toggle between the legacy tree-walking interpreter and the new bytecode compiler. --- core/Cargo.toml | 3 +-- core/src/bytecode/ast/mod.rs | 6 +++--- core/src/lib.rs | 1 - core/src/parser/grammar.lalrpop | 2 +- core/src/parser/utils.rs | 4 ++-- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index e802fa8d23..39c34617bc 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -28,7 +28,6 @@ doc = ["dep:comrak"] format = ["dep:topiary-core", "dep:topiary-queries", "dep:tree-sitter-nickel"] metrics = ["dep:metrics"] nix-experimental = [ "dep:cxx", "dep:cxx-build", "dep:pkg-config" ] -bytecode-experimental = ["dep:bumpalo"] benchmark-ci = [] [build-dependencies] @@ -87,7 +86,7 @@ tree-sitter-nickel = { workspace = true, optional = true } metrics = { workspace = true, optional = true } strsim = "0.10.0" -bumpalo = { workspace = true, optional = true } +bumpalo = { workspace = true } [dev-dependencies] pretty_assertions.workspace = true diff --git a/core/src/bytecode/ast/mod.rs b/core/src/bytecode/ast/mod.rs index 3d00694861..1f070a0896 100644 --- a/core/src/bytecode/ast/mod.rs +++ b/core/src/bytecode/ast/mod.rs @@ -1,8 +1,8 @@ -//! The Nickel AST, as ingested by the bytecode compiler. +//! The Nickel AST, as ingested by the (future) bytecode compiler. //! //! Since the AST is built once for each Nickel expression and is then compiled away to bytecode, -//! the number nodes ever allocated should be reasonably bounded by the input program size. Thus, -//! for performance reasons, we allocate notes using an arena and keep them alive until the end of +//! the total number of allocated nodes is reasonably bounded by the input program size. Thus, for +//! performance reasons, we allocate notes using an arena and keep them alive until the end of //! compilation. In return, we get fast allocation and de-allocation, and we can easily reference //! other nodes and data structures using native references. //! diff --git a/core/src/lib.rs b/core/src/lib.rs index dc38f338f6..59031df64a 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -1,4 +1,3 @@ -#[cfg(feature = "bytecode-experimental")] pub mod bytecode; pub mod cache; pub mod closurize; diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index e9351cf868..02296675d9 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -302,7 +302,7 @@ UniTerm: UniTerm<'ast> = { Ok(UniTerm::from(mk_import_explicit(alloc, s, t, mk_span(src_id, l, r))?)) }, "import" => { - UniTerm::from(Term::Import(Import::Package { id: pkg.ident() })) + UniTerm::from(alloc.import_package(pkg.ident())) } }; diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index b7307f0067..011141b853 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -628,7 +628,7 @@ pub fn mk_import_based_on_filename<'ast>( // Fall back to InputFormat::Nickel in case of unknown filename extension for backwards compatiblilty. let format = format.unwrap_or_default(); - Ok(alloc.import(path, format)) + Ok(alloc.import_path(path, format)) } pub fn mk_import_explicit<'ast>( @@ -642,7 +642,7 @@ pub fn mk_import_explicit<'ast>( return Err(ParseError::InvalidImportFormat { span }); }; - Ok(alloc.import(path, format)) + Ok(alloc.import_path(path, format)) } /// Determine the minimal level of indentation of a multi-line string. From caa860ec5dfa3b06b45555d3d348380be546a084 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 19 Nov 2024 16:43:21 +0100 Subject: [PATCH 05/23] Fix curried operator handling and make its impl nicer --- core/src/parser/grammar.lalrpop | 85 ++++++----------------- core/src/parser/utils.rs | 116 ++++++++++++++++++++++++++------ 2 files changed, 116 insertions(+), 85 deletions(-) diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 02296675d9..eb9f197fc6 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -1190,6 +1190,10 @@ InfixUOp5: PrimOp = { "!" => PrimOp::BoolNot, } +InfixBOp6: PrimOp = { + "&" => PrimOp::Merge(MergeKind::Standard), +} + InfixBOp7: PrimOp = { "<" => PrimOp::LessThan, "<=" => PrimOp::LessOrEq, @@ -1213,6 +1217,7 @@ InfixBOp: PrimOp = { InfixBOp2, InfixBOp3, InfixBOp4, + InfixBOp6, InfixBOp7, InfixBOp8, } @@ -1224,70 +1229,26 @@ InfixUOpOrLazyBOp: PrimOp = { } InfixOp: InfixOp = { - => InfixOp(<>), - => InfixOp(<>), + InfixBOp => InfixOp(<>), + InfixUOpOrLazyBOp => InfixOp(<>), } -//TODO[RFC007]: restore proper operation positions -CurriedOp: Node<'ast> = { - InfixOp => <>.eta_expand(alloc), - "&" => InfixOp(PrimOp::Merge(MergeKind::Standard)).eta_expand(alloc), - "|>" => { - let fst_arg = LocIdent::fresh(); - let snd_arg = LocIdent::fresh(); +EtaExpand: Node<'ast> = => + op.eta_expand(alloc, mk_pos(src_id, l, r)); - fun!( - alloc, - fst_arg, - snd_arg, - app!( - alloc, - builder::var(snd_arg), - builder::var(fst_arg), - ), - ).node - }, - "!=" => { - let fst_arg = LocIdent::fresh(); - let snd_arg = LocIdent::fresh(); +// Infix ops that are desugared away but for which we still need support the +// curried operator syntax. +ExtendedInfixOp: ExtendedInfixOp = { + "|>" => ExtendedInfixOp::ReverseApp, + "!=" => ExtendedInfixOp::NotEqual, +} - fun!( - alloc, - fst_arg, - snd_arg, - primop_app!( - alloc, - PrimOp::BoolNot, - primop_app!( - alloc, - PrimOp::Eq, - builder::var(fst_arg), - builder::var(snd_arg), - ), - ), - ).node - }, - //`foo.bar` is a static record access, but when used in a curried form, it's - //a dynamic record access (that is, `(.) foo bar` is `foo."%{bar}"`). It - //turns out a dynamic record access takes the record as the last argument, - //in the style of the stdlib. If we want `(.) foo bar` to be `foo."%{bar}"`, - //we thus have to flip the arguments. - "." => { - let fst_arg = LocIdent::fresh(); - let snd_arg = LocIdent::fresh(); - - fun!( - alloc, - fst_arg, - snd_arg, - primop_app!( - alloc, - PrimOp::RecordGet, - builder::var(snd_arg), - builder::var(fst_arg), - ) - ).node - }, +DotAsInfixOp: InfixOp = "." => InfixOp(PrimOp::RecordGet); + +CurriedOp: Node<'ast> = { + EtaExpand, + EtaExpand, + EtaExpand, } InfixUOpApp: UniTerm<'ast> = @@ -1322,9 +1283,7 @@ InfixExpr: UniTerm<'ast> = { InfixUOpApp, #[precedence(level="6")] #[assoc(side="left")] - > "&" > => - UniTerm::from(primop_app!(alloc, PrimOp::Merge(MergeKind::Standard), t1, t2)), - + InfixBOpApp, > "|>" > => UniTerm::from(app!(alloc, t2, t1)), diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 011141b853..5918c50307 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -21,9 +21,11 @@ use crate::{ combine::CombineAlloc, eval::merge::{merge_doc, split}, files::FileId, + fun, identifier::LocIdent, label::{Label, MergeKind, MergeLabel}, position::{RawSpan, TermPos}, + primop_app, typ::Type, }; @@ -212,14 +214,19 @@ pub enum RecordLastField<'ast> { Ellipsis, } +/// Trait for operators that can be eta-expanded to a function. +pub(super) trait EtaExpand { + /// Eta-expand an operator. This wraps an operator, for example `==`, as a function `fun x1 x2 + /// => x1 == x2`. Propagate the position of the curried operator to the generated primop apps + /// for better error reporting. + fn eta_expand(self, alloc: &AstAlloc, pos: TermPos) -> Node<'_>; +} + /// An infix operator that is not applied. Used for the curried operator syntax (e.g `(==)`) pub(super) struct InfixOp(pub(super) primop::PrimOp); -impl InfixOp { - /// Eta-expand an operator. This wraps an operator, for example `==`, as a function `fun x1 x2 - /// => x1 == x2`. Propagate the given position to the function body, for better error - /// reporting. - pub fn eta_expand(self, alloc: &AstAlloc) -> Node<'_> { +impl EtaExpand for InfixOp { + fn eta_expand(self, alloc: &AstAlloc, pos: TermPos) -> Node<'_> { match self { // We treat `UnaryOp::BoolAnd` and `UnaryOp::BoolOr` separately. // @@ -234,28 +241,93 @@ impl InfixOp { let fst_arg = LocIdent::fresh(); let snd_arg = LocIdent::fresh(); - alloc.nary_fun( - [ - pattern::Pattern::any(fst_arg), - pattern::Pattern::any(snd_arg), - ], - alloc - .app( - alloc - .prim_op(op, iter::once(Node::Var(fst_arg).into())) - .into(), - iter::once(Node::Var(snd_arg).into()), - ) - .into(), + fun!( + alloc, + fst_arg, + snd_arg, + app!( + alloc, + primop_app!(alloc, op, builder::var(fst_arg)), + builder::var(snd_arg), + ) + .with_pos(pos), + ) + .node + } + // `RecordGet field record` corresponds to `record."%{field}"`. Using the curried + // version `(.)` has thus reversed argument corresponding to the `RecordGet` primop, so + // we need to flip them. + InfixOp(op @ primop::PrimOp::RecordGet) => { + let fst_arg = LocIdent::fresh(); + let snd_arg = LocIdent::fresh(); + + fun!( + alloc, + fst_arg, + snd_arg, + primop_app!(alloc, op, builder::var(snd_arg), builder::var(fst_arg)) + .with_pos(pos), ) + .node } InfixOp(op) => { - let arg = LocIdent::fresh(); + let vars: Vec<_> = iter::repeat_with(|| LocIdent::fresh()) + .take(op.arity()) + .collect(); + let fun_args: Vec<_> = vars.iter().map(|arg| pattern::Pattern::any(*arg)).collect(); + let args: Vec<_> = vars.into_iter().map(builder::var).collect(); + + alloc.nary_fun(fun_args, alloc.prim_op(op, args).spanned(pos)) + } + } + } +} + +/// Additional infix operators that aren't proper primitive operations in the Nickel AST but are +/// still available in the surface syntax (and desugared at parsing time). They can still be used +/// in a curried form so they need a wrapper and an `EtaExpand` implementation. +pub(super) enum ExtendedInfixOp { + /// The reverse application operation or pipe operator `|>`. + ReverseApp, + /// The inequality operator `!=`. + NotEqual, +} + +impl EtaExpand for ExtendedInfixOp { + fn eta_expand(self, alloc: &AstAlloc, pos: TermPos) -> Node<'_> { + match self { + ExtendedInfixOp::ReverseApp => { + let fst_arg = LocIdent::fresh(); + let snd_arg = LocIdent::fresh(); + + fun!( + alloc, + fst_arg, + snd_arg, + app!(alloc, builder::var(snd_arg), builder::var(fst_arg)).with_pos(pos), + ) + .node + } + ExtendedInfixOp::NotEqual => { + let fst_arg = LocIdent::fresh(); + let snd_arg = LocIdent::fresh(); - alloc.fun( - pattern::Pattern::any(arg), - alloc.prim_op(op, iter::once(Node::Var(arg).into())).into(), + fun!( + alloc, + fst_arg, + snd_arg, + primop_app!( + alloc, + primop::PrimOp::BoolNot, + primop_app!( + alloc, + primop::PrimOp::Eq, + builder::var(fst_arg), + builder::var(snd_arg), + ), + ), ) + .node } } } From 01164a341e7e1480b8c4c47339aa6cbbdcd733d6 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 19 Nov 2024 17:52:21 +0100 Subject: [PATCH 06/23] Revert to the previous handling of last fields (might need conflict resolution for RepeatSep1) --- core/src/parser/grammar.lalrpop | 73 +++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index eb9f197fc6..c13d9607d5 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -111,11 +111,19 @@ AsType: Type<'ast> = > =>? // Repeat a rule zero times or more with a separator interspersed, such that the last // separator is optional: for example, Delimiter will both accept // `1,2` and `1,2,`. -RepeatSep: Vec = Sep)*> Sep? => { +RepeatSep: Vec = Sep)*> => { + elems.extend(last); + elems +}; + +// Same as `RepeatSep`, but repeat the rule at least once (one or more), instead of zero or +// more. +RepeatSep1: Vec = )*> Sep? => { elems.push(last); elems }; + AsUniTerm: UniTerm<'ast> = > => UniTerm::from(ut); // Macro repeating a rule producing some form of annotation (that can be @@ -271,7 +279,7 @@ UniTerm: UniTerm<'ast> = { AsUniTerm>, "let" - > + > "in" =>? { Ok(UniTerm::from(mk_let( alloc, @@ -709,7 +717,21 @@ ConstantPatternData: ConstantPatternData<'ast> = { }; RecordPattern: RecordPattern<'ast> = { - "{" ",")*> "}" =>? { + "{" ",")*> "}" =>? { + let tail = match last { + Some(LastPattern::Normal(m)) => { + field_pats.push(m); + TailPattern::Empty + }, + Some(LastPattern::Ellipsis(Some(captured))) => { + TailPattern::Capture(captured) + } + Some(LastPattern::Ellipsis(None)) => { + TailPattern::Open + } + None => TailPattern::Empty, + }; + let pattern = RecordPattern { patterns: alloc.field_patterns(field_pats), tail, @@ -718,11 +740,25 @@ RecordPattern: RecordPattern<'ast> = { pattern.check_dup()?; Ok(pattern) - }, + } }; ArrayPattern: ArrayPattern<'ast> = { - "[" ",")*> "]" => { + "[" ",")*> "]" => { + let tail = match last { + Some(LastPattern::Normal(m)) => { + patterns.push(m); + TailPattern::Empty + }, + Some(LastPattern::Ellipsis(Some(captured))) => { + TailPattern::Capture(captured) + } + Some(LastPattern::Ellipsis(None)) => { + TailPattern::Open + } + None => TailPattern::Empty, + }; + ArrayPattern { patterns: alloc.patterns(patterns), tail, @@ -898,18 +934,21 @@ FieldPattern: FieldPattern<'ast> = { }, }; -// Potential ellipsis at the end of an array or a record pattern. This rule also -// account for the presence of a trailing -TailPattern: TailPattern = { - "," ".." => { - if let Some(captured) = <> { - TailPattern::Capture(captured) - } - else { - TailPattern::Open - } - }, - ","? => TailPattern::Empty, +// Last field pattern of a record pattern. We need this rule (together with +// `LastElemPat`) combining both a field and a potential ellipsis because +// putting the ellipsis in a separate rule AND handling the case of zero fields +// (`{..}`) isn't possible: the fact that the ellipsis will need a "," separator +// before it will depend on the presence of zero or more fields. A stand-alone +// ellipsis rule would have no way to know that. +LastFieldPat: LastPattern> = { + FieldPattern => LastPattern::Normal(<>), + ".." => LastPattern::Ellipsis(<>), +}; + +// Last pattern of an array pattern. See `LastFieldPat`. +LastElemPat: LastPattern> = { + Pattern => LastPattern::Normal(<>), + ".." => LastPattern::Ellipsis(<>), } // A default annotation in a pattern. From 1ee2261c5b58cb8d096f4296d8833eb7d8b972be Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 20 Nov 2024 10:11:06 +0100 Subject: [PATCH 07/23] Fix compilation errors and spurious grammar ambiguity --- core/src/bytecode/ast/pattern/mod.rs | 22 ---------------------- core/src/parser/grammar.lalrpop | 20 +++++++++++--------- core/src/parser/utils.rs | 19 +++++++++++++++++++ 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/core/src/bytecode/ast/pattern/mod.rs b/core/src/bytecode/ast/pattern/mod.rs index 032f390d44..e682c58cf4 100644 --- a/core/src/bytecode/ast/pattern/mod.rs +++ b/core/src/bytecode/ast/pattern/mod.rs @@ -66,28 +66,6 @@ pub struct FieldPattern<'ast> { pub pos: TermPos, } -/// The last match in a data structure pattern. This can either be a normal match, or an ellipsis -/// which can capture the rest of the data structure. The type parameter `P` is the type of the -/// pattern of the data structure: currently, ellipsis matches are only supported for record, but -/// we'll probably support them for arrays as well. -/// -/// This enum is mostly used during parsing. -/// -/// # Example -/// -/// - In `{foo={}, bar}`, the last match is an normal match. -/// - In `{foo={}, bar, ..}`, the last match is a non-capturing ellipsis. -/// - In `{foo={}, bar, ..rest}`, the last match is a capturing ellipsis. -#[derive(Debug, PartialEq, Clone)] -pub enum PatternTail<'ast, P> { - /// The last field is a normal match. In this case the pattern is "closed" so every record - /// fields should be matched. - Normal(&'ast P), - /// The pattern is "open" `, ..}`. Optionally you can bind a record containing the remaining - /// fields to an `Identifier` using the syntax `, ..y}`. - Ellipsis(Option), -} - /// A record pattern. #[derive(Debug, PartialEq, Clone)] pub struct RecordPattern<'ast> { diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index c13d9607d5..73f9c49bea 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -116,9 +116,9 @@ RepeatSep: Vec = Sep)*> => { elems }; -// Same as `RepeatSep`, but repeat the rule at least once (one or more), instead of zero or -// more. -RepeatSep1: Vec = )*> Sep? => { +// Same as `RepeatSep`, but repeat the rule at least once (one or more), instead +// of zero or more. +RepeatSep1: Vec = Sep)*> Sep? => { elems.push(last); elems }; @@ -934,12 +934,14 @@ FieldPattern: FieldPattern<'ast> = { }, }; -// Last field pattern of a record pattern. We need this rule (together with -// `LastElemPat`) combining both a field and a potential ellipsis because -// putting the ellipsis in a separate rule AND handling the case of zero fields -// (`{..}`) isn't possible: the fact that the ellipsis will need a "," separator -// before it will depend on the presence of zero or more fields. A stand-alone -// ellipsis rule would have no way to know that. +// Last field pattern of a record pattern. +// +// We need this rule (together with `LastElemPat`) combining both a last field +// or a potential ellipsis because putting the ellipsis in a separate rule AND +// handling the case of zero fields (`{..}`) isn't possible: the fact that the +// ellipsis will need a "," separator before depends on the presence of zero or +// more fields, but a stand-alone ellipsis rule has no way to get this +// information about previous match. LastFieldPat: LastPattern> = { FieldPattern => LastPattern::Normal(<>), ".." => LastPattern::Ellipsis(<>), diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 5918c50307..269b38fa10 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -214,6 +214,25 @@ pub enum RecordLastField<'ast> { Ellipsis, } +/// The last match in a data structure pattern. This can either be a normal match, or an ellipsis +/// which can capture the rest of the data structure. The type parameter `P` is the type of the +/// pattern of the data structure (ellipsis are supported for both array and record patterns). +/// +/// # Example +/// +/// - In `{foo={}, bar}`, the last match is an normal match. +/// - In `{foo={}, bar, ..}`, the last match is a non-capturing ellipsis. +/// - In `{foo={}, bar, ..rest}`, the last match is a capturing ellipsis. +#[derive(Debug, PartialEq, Clone)] +pub enum LastPattern

{ + /// The last field is a normal match. In this case the pattern is "closed" so every record + /// fields should be matched. + Normal(P), + /// The pattern is "open" `, ..}`. Optionally you can bind a record containing the remaining + /// fields to an `Identifier` using the syntax `, ..y}`. + Ellipsis(Option), +} + /// Trait for operators that can be eta-expanded to a function. pub(super) trait EtaExpand { /// Eta-expand an operator. This wraps an operator, for example `==`, as a function `fun x1 x2 From 85fd2b07773f5db029257218a0b3692a2ad0f203 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 20 Nov 2024 10:43:14 +0100 Subject: [PATCH 08/23] Fix unwrapping position panicking --- core/src/bytecode/ast/compat.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index ecd8a22dfe..8fa165fd13 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -1262,23 +1262,18 @@ impl<'ast> FromAst> for term::Term { } } Node::App { head: fun, args } => { - // unwrap(): the position of Ast should always be set (we might move to `RawSpan` - // instead of `TermPos` soon) - let fun_span = fun.pos.unwrap(); + let fun_pos = fun.pos; let rterm = args.iter().fold(fun.to_mainline(), |result, arg| { // This case is a bit annoying: we need to extract the position of the sub // application to satisfy the old AST structure, but this information isn't // available directly. + // // What we do here is to fuse the span of the term being built and the one of // the current argument, which should be a reasonable approximation (if not // exactly the same thing). - // unwrap(): the position of Ast should always be set (we might move to `RawSpan` - // instead of `TermPos` soon) - let span_arg = arg.pos.unwrap(); - let span = fun_span.fuse(span_arg); - - term::RichTerm::new(Term::App(result, arg.to_mainline()), span.into()) + let arg_pos = arg.pos; + term::RichTerm::new(Term::App(result, arg.to_mainline()), fun_pos.fuse(arg_pos)) }); rterm.term.into_owned() From 78871764960ef340269cdb3500197a30e039af74 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 20 Nov 2024 11:17:02 +0100 Subject: [PATCH 09/23] Fill todo!() when parsing seal/unseal --- core/src/parser/grammar.lalrpop | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 73f9c49bea..09b784eb67 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -1358,8 +1358,8 @@ BOpPre: PrimOp = { "contract/check" => PrimOp::ContractCheck, "contract/array_lazy_app" => PrimOp::ContractArrayLazyApp, "contract/record_lazy_app" => PrimOp::ContractRecordLazyApp, - "unseal" => todo!(), - "seal" => todo!(), + "unseal" => PrimOp::Seal, + "seal" => PrimOp::Unseal, "label/go_field" => PrimOp::LabelGoField, "record/has_field" => PrimOp::RecordHasField(RecordOpKind::IgnoreEmptyOpt), "record/has_field_with_opts" => PrimOp::RecordHasField(RecordOpKind::ConsiderAllFields), From d9ea8adf96ff58e211ea361839468bd911a0f395 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 20 Nov 2024 11:17:31 +0100 Subject: [PATCH 10/23] Entirely get rid of rec priorities leftovers --- core/stdlib/internals.ncl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/core/stdlib/internals.ncl b/core/stdlib/internals.ncl index bd74ab5483..9f2e3caf63 100644 --- a/core/stdlib/internals.ncl +++ b/core/stdlib/internals.ncl @@ -410,11 +410,6 @@ # `%contract/custom%`). "$naked_to_custom" = fun naked label value => 'Ok (naked label value), - # Recursive priorities operators - - "$rec_force" = fun value => %rec_force% (%force% value), - "$rec_default" = fun value => %rec_default% (%force% value), - # Provide access to std.contract.Equal within the initial environement. Merging # makes use of `std.contract.Equal`, but it can't blindly substitute such an # expression, because `contract` might have been redefined locally. Putting it From 458f1f2a87a76b81c2bcb8ba502d9920bc47c181 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 20 Nov 2024 13:37:44 +0100 Subject: [PATCH 11/23] Fix fix_type_vars for forall binders, improve code doc sporadically --- core/src/parser/uniterm.rs | 66 ++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/core/src/parser/uniterm.rs b/core/src/parser/uniterm.rs index 613511f0b6..6b0bd48f9b 100644 --- a/core/src/parser/uniterm.rs +++ b/core/src/parser/uniterm.rs @@ -331,7 +331,7 @@ impl<'ast> UniRecord<'ast> { } /// Checks if this record qualifies as a record type. If this function - /// returns true, then `into_type_strict()` must succeed. + /// returns `true`, then [Self::into_type_strict] must succeed. pub fn is_record_type(&self) -> bool { self.fields.iter().all(|field_def| { // Field paths with a depth > 1 are not supported in record types. @@ -357,9 +357,9 @@ impl<'ast> UniRecord<'ast> { }) } - /// A plain record type, uniquely containing fields of the form `fields: - /// Type`. Currently, this doesn't support the field path syntax: - /// `{foo.bar.baz : Type}.into_type_strict()` returns an `Err`. + /// Turns this record into a plain record type, uniquely containing fields of the form `fields: + /// Type`. Currently, this doesn't support the field path syntax: `{foo.bar.baz : + /// Type}.into_type_strict()` returns an `Err`. pub fn into_type_strict( self, alloc: &'ast AstAlloc, @@ -484,7 +484,7 @@ impl<'ast> TryConvert<'ast, UniRecord<'ast>> for Ast<'ast> { type Error = ParseError; /// Convert a `UniRecord` to a term. If the `UniRecord` is syntactically a record type or it - /// has a tail, it is first interpreted as a type and then wrapped in a `Term::Types`. One + /// has a tail, it is first interpreted as a type and then wrapped in a `Term::Type`. One /// exception is the empty record, which behaves the same both as a type and a contract, and /// turning an empty record literal to an opaque function would break everything. /// @@ -717,10 +717,10 @@ where /// # Ownership /// /// [Self::fix_type_vars_env] might need to be called both on owned data and on immutably - /// borrowed (e.g. [`Type`][crate::bytecode::ast::typ::Type] and [`&'ast + /// borrowed data (e.g. [`Type`][crate::bytecode::ast::typ::Type] and [`&'ast /// Type`][crate::bytecode::ast::typ::Type]). We don't want to duplicate the logic of - /// [Self::fix_type_vars_env] for both, as we can't write that is generic enough and properly - /// avoid useless allocations. + /// [Self::fix_type_vars_env] for both, as we can't write one that is generic enough while + /// properly avoiding useless allocations. /// /// The idea of the current API is that even when operating on owned data, `self` is taken by /// reference. If `self` isn't modified by the fix type phase, then `None` is returned and the @@ -731,13 +731,15 @@ where /// Otherwise, the caller can use [the ast allocator `alloc`][crate::bytecode::ast::AstAlloc] /// to move the owned data into the allocator and get an `&'ast` reference out of it. The only /// cost is that for owned data, we could have reused the original `self` instead of returning - /// a new one, but this is a detail: in practice only the top-level call is performed on owned - /// data, and the recursive calls are all performed on `&'ast` references. At worse, we waste - /// the top-level node, which is stack-allocated anyway. + /// a new one, but this is a detail: in practice, only the top-level call of `fix_type_vars` is + /// performed on owned data, and the recursive calls are all performed on `&'ast` references. + /// At worse, we waste the top-level node, which is stack-allocated anyway. /// - /// Because allocated AST nodes are immutable and can't be reclaimed until the whole AST is - /// finally transformed to either the mainline AST or to (in the future) bytecode, we want to - /// avoid reconstructing useless copies of nodes, which is made possible by [FixResult]. + /// Because AST nodes are allocated in an arena and are immutable, they won't be reclaimed + /// until the whole AST is finally transformed to either the mainline AST or (in the future) + /// compiled to bytecode. We want to avoid building useless copies of exiting nodes, which is + /// the reason behind not using a simpler strategy of just always returning a new value, that + /// might be identical to the old one if no type variable has been fixed. fn fix_type_vars_env( &self, alloc: &'ast AstAlloc, @@ -769,7 +771,7 @@ impl<'ast, 'a> FixTypeVars<'ast> for Type<'ast> { | TypeF::Contract(_) // We don't fix type variables inside a dictionary contract. A dictionary contract // should not be considered as a static type, but instead work as a contract. In - // particular mustn't be allowed to capture type variables from the enclosing type: see + // particular we forbid capturing type variables from the enclosing type: see // https://github.com/tweag/nickel/issues/1228. | TypeF::Dict { flavour: DictTypeFlavour::Contract, ..} | TypeF::Wildcard(_) => Ok(None), @@ -807,14 +809,15 @@ impl<'ast, 'a> FixTypeVars<'ast> for Type<'ast> { } TypeF::Forall { var, - var_kind: _, + var_kind: ref prev_var_kind, body, } => { - // We span a new VarKindCell and put it in the environment. The recursive calls to - // fix_type_vars will fill this cell with the correct kind, which we get afterwards - // to set the right value for `var_kind`. + // We span a new `VarKindCell` and put it in the environment. The recursive calls + // to `fix_type_vars` will fill this cell with the correct kind, which we get + // afterwards to set the right value for `var_kind`. bound_vars.insert(var.ident(), VarKindCell::new()); - let body = body.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + let body_fixed = body.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + // unwrap(): we just inserted a value for `var` above, and environment can never // delete values. // take_var_kind(): once we leave the body of this forall, we no longer need @@ -827,13 +830,22 @@ impl<'ast, 'a> FixTypeVars<'ast> for Type<'ast> { .take_var_kind() .unwrap_or_default(); - Ok(body.map(|body| { - build_fixed(TypeF::Forall { + // By default, the parser sets `var_kind` to `Type`. If the `var_kind` turns out to + // actually be `Type`, and the body hasn' changed, we can avoid any cloning and + // return `Ok(None)`. Otherwise, we have to build a new `TypeF::Forall`. We still + // want to defend against callers that wouldn't follow this convention (that + // `prev_var_kind` is necessarily `Type` before fixing), so we still check it. + if body_fixed.is_some() || !matches!((&var_kind, &prev_var_kind), (&VarKind::Type, &VarKind::Type)) { + let body = body_fixed.map(|body| alloc.alloc(body)).unwrap_or(body); + + Ok(Some(build_fixed(TypeF::Forall { var, var_kind, - body: alloc.type_move(body), - }) - })) + body, + }))) + } else { + Ok(None) + } } TypeF::Dict { type_fields, @@ -949,8 +961,8 @@ impl<'ast> FixTypeVars<'ast> for EnumRows<'ast> { ) -> Result>, ParseError> { match erows.0 { EnumRowsF::Empty => Ok(None), - // We can't have a contract in tail position, so we don't fix `TailVar`. However, we - // have to set the correct kind for the corresponding forall binder. + // We can't have a contract in tail position, so we don't fix `TailVar` itself. + // However, we have to set the correct kind for the corresponding forall binder. EnumRowsF::TailVar(id) => { if let Some(cell) = bound_vars.get(&id.ident()) { cell.try_set(VarKind::EnumRows { From 053987825fd38e3b2e2f6db4e39f1faf86aab584 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 20 Nov 2024 14:31:35 +0100 Subject: [PATCH 12/23] Fix handling of zero-ary application/variable --- core/src/parser/grammar.lalrpop | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 09b784eb67..a4f9eff2ac 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -357,21 +357,30 @@ ApplicativeHead: UniTerm<'ast> = { // A n-ary application-like expression (n may be 0, in the sense that this rule // also includes previous levels). Applicative: UniTerm<'ast> = { - > *> => { - let node = match &head { - // A zero-ary application is just the head. - _ if args.is_empty() => head.node, + > *> =>? { + // A zero-ary application is just the head. + if args.is_empty() { + Ok(head) + } + else { + // For a general application, we need the head to be a term. We + // don't support general type applications yet - `Array T` is + // special cased as a type constructor. + let head = Ast::try_convert(alloc, head) .map_err(|e| lalrpop_util::ParseError::User{error: e})?; + // We special case the application of an enum tag here. In principle, an // enum variant applied to an argument is of different nature than a // function application. However, for convenience, we made the syntax // the same. So we now have to detect cases like `'Foo {x=1}` and // convert that to a proper enum variant. - Ast { node: Node::EnumVariant { tag, arg: None }, pos: _ } if args.len() == 1 => - alloc.enum_variant(*tag, args.pop()), - _ => alloc.app(head, args), - }; - - UniTerm::from(node) + if let (Node::EnumVariant { ref tag, arg: None }, 1) + = (&head.node, args.len()) { + Ok(alloc.enum_variant(*tag, args.pop()).into()) + } + else { + Ok(alloc.app(head, args).into()) + } + } }, }; From 0bfc7272e7884b48ee0c1127757266d605e084cf Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 20 Nov 2024 16:23:52 +0100 Subject: [PATCH 13/23] Fix test code and corner case of new -> mainline conversion --- core/src/bytecode/ast/compat.rs | 25 +++++++++++++++++---- core/src/eval/tests.rs | 4 ++-- core/src/parser/tests.rs | 40 +++++++++++++-------------------- core/src/parser/utils.rs | 9 +++++--- core/src/pretty.rs | 6 +++-- lsp/nls/src/analysis.rs | 4 ++-- lsp/nls/src/position.rs | 4 ++-- 7 files changed, 52 insertions(+), 40 deletions(-) diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index 8fa165fd13..f689f4ed34 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -1220,6 +1220,23 @@ impl<'ast> FromAst> for term::Term { body, rec, } => { + // Mainline term bindings can't have any metadata associated with them. We need to + // rewrite let metadata to be free-standing type and contract annotations instead, + // which is achieved by this helper. + fn with_metadata(metadata: &LetMetadata<'_>, value: &Ast<'_>) -> term::RichTerm { + let value: term::RichTerm = value.to_mainline(); + let pos = value.pos; + + if metadata.annotation.is_empty() { + return value; + } + + term::RichTerm::new( + term::Term::Annotated(metadata.annotation.to_mainline(), value), + pos, + ) + } + // We try to collect all patterns as single identifiers. If this works, we can emit // a simpler / more compact `Let`. let try_bindings = bindings @@ -1227,10 +1244,10 @@ impl<'ast> FromAst> for term::Term { .map( |LetBinding { pattern, - metadata: _, + metadata, value, }| match pattern.data { - PatternData::Any(id) => Some((id, value.to_mainline())), + PatternData::Any(id) => Some((id, with_metadata(metadata, value))), _ => None, }, ) @@ -1251,9 +1268,9 @@ impl<'ast> FromAst> for term::Term { |LetBinding { pattern, value, - metadata: _, + metadata, }| { - (pattern.to_mainline(), value.to_mainline()) + (pattern.to_mainline(), with_metadata(metadata, value)) }, ) .collect(); diff --git a/core/src/eval/tests.rs b/core/src/eval/tests.rs index 5cfa29424e..49e0b80ded 100644 --- a/core/src/eval/tests.rs +++ b/core/src/eval/tests.rs @@ -29,8 +29,8 @@ fn eval_full_no_import(t: RichTerm) -> Result { fn parse(s: &str) -> Option { let id = Files::new().add("", String::from(s)); - grammar::ExprParser::new() - .parse_strict(id, lexer::Lexer::new(s)) + grammar::TermParser::new() + .parse_strict_compat(id, lexer::Lexer::new(s)) .map(RichTerm::without_pos) .map_err(|err| println!("{err:?}")) .ok() diff --git a/core/src/parser/tests.rs b/core/src/parser/tests.rs index 37de6322cd..0c7098b54f 100644 --- a/core/src/parser/tests.rs +++ b/core/src/parser/tests.rs @@ -15,8 +15,8 @@ use assert_matches::assert_matches; fn parse(s: &str) -> Result { let id = Files::new().add("", String::from(s)); - super::grammar::ExprParser::new() - .parse_strict(id, Lexer::new(s)) + super::grammar::TermParser::new() + .parse_strict_compat(id, Lexer::new(s)) .map_err(|errs| errs.errors.first().unwrap().clone()) } @@ -38,29 +38,19 @@ fn mk_single_chunk(s: &str) -> RichTerm { } fn mk_symbolic_single_chunk(prefix: &str, s: &str) -> RichTerm { - use crate::term::record::Field; - - build_record( - [ - ( - FieldPathElem::Ident("tag".into()), - Field::from(RichTerm::from(Term::Enum("SymbolicString".into()))), - ), - ( - FieldPathElem::Ident("prefix".into()), - Field::from(RichTerm::from(Term::Enum(prefix.into()))), - ), - ( - FieldPathElem::Ident("fragments".into()), - Field::from(RichTerm::from(Array( - std::iter::once(mk_single_chunk(s)).collect(), - Default::default(), - ))), - ), - ], - Default::default(), - ) - .into() + use crate::term::{make::builder, record::Field}; + + builder::Record::new() + .field("tag") + .value(Term::Enum("SymbolicString".into())) + .field("prefix") + .value(Term::Enum(prefix.into())) + .field("fragments") + .value(Array( + std::iter::once(mk_single_chunk(s)).collect(), + Default::default(), + )) + .into() } #[test] diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 269b38fa10..70c4b4aca3 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -946,9 +946,12 @@ pub fn strip_indent<'ast>(chunks: &mut Vec>>) { #[cfg(test)] mod tests { - use crate::typ::TypeF; - - use super::*; + use crate::{ + combine::Combine, + label::Label, + term::{LabeledType, TypeAnnotation}, + typ::{Type, TypeF}, + }; #[test] fn contract_annotation_order() { diff --git a/core/src/pretty.rs b/core/src/pretty.rs index 2d357e747f..cc0b316801 100644 --- a/core/src/pretty.rs +++ b/core/src/pretty.rs @@ -1383,7 +1383,7 @@ mod tests { use crate::files::Files; use crate::parser::lexer::Lexer; use crate::parser::{ - grammar::{ExprParser, FixedTypeParser}, + grammar::{FixedTypeParser, TermParser}, ErrorTolerantParserCompat, }; use pretty::Doc; @@ -1404,7 +1404,9 @@ mod tests { fn parse_term(s: &str) -> RichTerm { let id = Files::new().add("", s); - ExprParser::new().parse_strict(id, Lexer::new(s)).unwrap() + TermParser::new() + .parse_strict_compat(id, Lexer::new(s)) + .unwrap() } /// Parse a string representation `long` of a type, and assert that diff --git a/lsp/nls/src/analysis.rs b/lsp/nls/src/analysis.rs index 52fa8fe709..61d96b6f61 100644 --- a/lsp/nls/src/analysis.rs +++ b/lsp/nls/src/analysis.rs @@ -479,8 +479,8 @@ mod tests { let s = "{ field. }"; let file = Files::new().add("", s.to_owned()); - let (rt, _errors) = grammar::ExprParser::new() - .parse_tolerant(file, lexer::Lexer::new(s)) + let (rt, _errors) = grammar::TermParser::new() + .parse_tolerant_compat(file, lexer::Lexer::new(s)) .unwrap(); let parent = ParentLookup::new(&rt); diff --git a/lsp/nls/src/position.rs b/lsp/nls/src/position.rs index 6abc9e57d5..477144476a 100644 --- a/lsp/nls/src/position.rs +++ b/lsp/nls/src/position.rs @@ -216,8 +216,8 @@ pub(crate) mod tests { pub fn parse(s: &str) -> (FileId, RichTerm) { let id = Files::new().add("", String::from(s)); - let term = grammar::ExprParser::new() - .parse_strict(id, lexer::Lexer::new(s)) + let term = grammar::TermParser::new() + .parse_strict_compat(id, lexer::Lexer::new(s)) .unwrap(); (id, term) } From 33735fe45069756e2fed104451046dcdb62b964a Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 20 Nov 2024 18:53:36 +0100 Subject: [PATCH 14/23] [Maybe to drop?] Fix failing test (symbolic string being recursive records) --- core/src/parser/tests.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/core/src/parser/tests.rs b/core/src/parser/tests.rs index 0c7098b54f..2ea3a00d87 100644 --- a/core/src/parser/tests.rs +++ b/core/src/parser/tests.rs @@ -1,5 +1,4 @@ use super::lexer::{Lexer, MultiStringToken, NormalToken, StringToken, SymbolicStringStart, Token}; -use super::utils::{build_record, FieldPathElem}; use crate::error::ParseError; use crate::files::Files; use crate::identifier::LocIdent; @@ -38,9 +37,9 @@ fn mk_single_chunk(s: &str) -> RichTerm { } fn mk_symbolic_single_chunk(prefix: &str, s: &str) -> RichTerm { - use crate::term::{make::builder, record::Field}; + use crate::term::{make::builder, SharedTerm}; - builder::Record::new() + let mut result: RichTerm = builder::Record::new() .field("tag") .value(Term::Enum("SymbolicString".into())) .field("prefix") @@ -50,7 +49,26 @@ fn mk_symbolic_single_chunk(prefix: &str, s: &str) -> RichTerm { std::iter::once(mk_single_chunk(s)).collect(), Default::default(), )) - .into() + .into(); + + // The builder interface is nice, but it produces non recursive records. Since the new AST + // symbolic string chunks produce recursive records (they're not really recursive, but there's + // no distinction in the source syntax, and it gets translated to a `RecRecord` by default). + // + // We hack around it by "peeling off" the outer record layer and replacing it with a recursive + // record. + + let term_mut = SharedTerm::make_mut(&mut result.term); + let content = std::mem::replace(term_mut, Term::Null); + + if let Term::Record(data) = content { + *term_mut = RecRecord(data, Vec::new(), None); + result + } else { + unreachable!( + "record was built using Record::builder, expected a record term, got something else" + ) + } } #[test] From fcb29c213d1f54d4d4fcea0bd5502cd51a1193e7 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 21 Nov 2024 10:25:50 +0100 Subject: [PATCH 15/23] Fix swapped seal/unseal --- core/src/parser/grammar.lalrpop | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index a4f9eff2ac..e5882319e7 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -1367,8 +1367,8 @@ BOpPre: PrimOp = { "contract/check" => PrimOp::ContractCheck, "contract/array_lazy_app" => PrimOp::ContractArrayLazyApp, "contract/record_lazy_app" => PrimOp::ContractRecordLazyApp, - "unseal" => PrimOp::Seal, - "seal" => PrimOp::Unseal, + "seal" => PrimOp::Seal, + "unseal" => PrimOp::Unseal, "label/go_field" => PrimOp::LabelGoField, "record/has_field" => PrimOp::RecordHasField(RecordOpKind::IgnoreEmptyOpt), "record/has_field_with_opts" => PrimOp::RecordHasField(RecordOpKind::ConsiderAllFields), From ed6d0bcec9ec8aa5764ae8ce7d0138e37b045979 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 21 Nov 2024 10:26:47 +0100 Subject: [PATCH 16/23] Fix missing position for elaborated merge (piecewise defs) --- core/src/parser/utils.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 70c4b4aca3..a31ebef600 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -91,8 +91,9 @@ pub enum FieldPathElem<'ast> { Ident(LocIdent), /// A quoted field declaration: `{ "%{protocol}" = .. }` /// - /// In practice, the argument must always be `StringChunks`, but since we also need to keep track - /// of the associated span it's handier to just use a `RichTerm`. + /// In practice, the argument must always [crate::bytecode::ast::StringChunks], but since we + /// also need to keep track of the associated span it's handier to just use an + /// [crate::bytecode::ast]. Expr(Ast<'ast>), } @@ -535,9 +536,8 @@ fn merge_fields<'ast>( field1: Field<'ast>, field2: Field<'ast>, ) -> Field<'ast> { - // FIXME: We're duplicating a lot of the logic in - // [`eval::merge::merge_fields`] but not quite enough to actually factor - // it out + // FIXME: We're duplicating a lot of the logic in [`eval::merge::merge_fields`] but not quite + // enough to actually factor it out fn merge_values<'ast>( alloc: &'ast AstAlloc, id_span: RawSpan, @@ -604,8 +604,13 @@ fn merge_fields<'ast>( }, ], ), - // cf [^record-elaboration-position] - pos: TermPos::None, + // We don't have a very good position here either (see + // [^record-elaboration-position]). However, as long as we convert the new AST to + // the mainline `term::Term` representation, we will need to set a span (and not + // just a position) for the merge label. Previously, we would use `id_span`. So we + // set `id_span` as a position so that the conversion code of + // `bytecode::ast::compat` can retrieve it and put in the merge label accordingly. + pos: id_span.into(), }, } } From 440a827c297ed8cc9f624a970a2a6b8a7afd1003 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 21 Nov 2024 11:39:30 +0100 Subject: [PATCH 17/23] Remove FieldDef and record elaboration from parser --- core/src/parser/utils.rs | 123 ++------------------------------------- 1 file changed, 5 insertions(+), 118 deletions(-) diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index a31ebef600..30bc8727b6 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -12,10 +12,11 @@ use indexmap::{map::Entry, IndexMap}; use super::error::ParseError; use crate::{ - combine::Combine, - eval::{ - merge::{merge_doc, split}, - operation::RecPriority, + app, + bytecode::ast::{ + pattern::bindings::Bindings as _, + record::{Field, FieldMetadata, FieldDef}, + *, }, cache::InputFormat, combine::CombineAlloc, @@ -84,21 +85,6 @@ pub enum StringEndDelimiter { Special, } -/// Left hand side of a record field declaration. -#[derive(Clone, Debug)] -pub enum FieldPathElem<'ast> { - /// A static field declaration: `{ foo = .. }` - Ident(LocIdent), - /// A quoted field declaration: `{ "%{protocol}" = .. }` - /// - /// In practice, the argument must always [crate::bytecode::ast::StringChunks], but since we - /// also need to keep track of the associated span it's handier to just use an - /// [crate::bytecode::ast]. - Expr(Ast<'ast>), -} - -pub type FieldPath<'ast> = Vec>; - /// A string chunk literal atom, being either a string or a single char. /// /// Because of the way the lexer handles escaping and interpolation, a contiguous static string @@ -109,105 +95,6 @@ pub enum ChunkLiteralPart { Char(char), } -/// A field definition atom. A field is defined by a path, a potential value, and associated -/// metadata. -#[derive(Clone, Debug)] -pub struct FieldDef<'ast> { - pub path: FieldPath<'ast>, - pub field: Field<'ast>, - pub pos: TermPos, -} - -impl<'ast> FieldDef<'ast> { - /// Elaborate a record field definition specified as a path, like `a.b.c = foo`, into a regular - /// flat definition `a = {b = {c = foo}}`. - /// - /// # Preconditions - /// - /!\ path must be **non-empty**, otherwise this function panics - pub fn elaborate(self, alloc: &'ast AstAlloc) -> (FieldPathElem<'ast>, Field<'ast>) { - let mut it = self.path.into_iter(); - let fst = it.next().unwrap(); - - let content = it.rev().fold(self.field, |acc, path_elem| { - // We first compute a position for the intermediate generated records (it's useful - // in particular for the LSP). The position starts at the subpath corresponding to - // the intermediate record and ends at the final value. - // - // unwrap is safe here becuase the initial content has a position, and we make sure - // we assign a position for the next field. - let pos = match path_elem { - FieldPathElem::Ident(id) => id.pos, - FieldPathElem::Expr(ref expr) => expr.pos, - }; - // unwrap is safe here because every id should have a non-`TermPos::None` position - let id_span = pos.unwrap(); - let acc_span = acc - .value - .as_ref() - .map(|value| value.pos.unwrap()) - .unwrap_or(id_span); - - // `RawSpan::fuse` only returns `None` when the two spans are in different files. - // A record field and its value *must* be in the same file, so this is safe. - let pos = TermPos::Original(id_span.fuse(acc_span).unwrap()); - - match path_elem { - FieldPathElem::Ident(id) => Field::from(Ast { - node: Node::Record(alloc.record_data( - iter::once((id, acc)), - iter::empty(), - false, - )), - pos, - }), - FieldPathElem::Expr(exp) => { - let static_access = exp.node.try_str_chunk_as_static_str(); - - if let Some(static_access) = static_access { - let id = LocIdent::new_with_pos(static_access, exp.pos); - Field::from(Ast { - node: Node::Record(alloc.record_data( - iter::once((id, acc)), - iter::empty(), - false, - )), - pos, - }) - } else { - // The record we create isn't recursive, because it is only comprised of - // one dynamic field. It's just simpler to use the infrastructure of - // `RecRecord` to handle dynamic fields at evaluation time rather than - // right here - Field::from(Ast { - node: Node::Record(alloc.record_data( - std::iter::empty(), - std::iter::once((exp, acc)), - false, - )), - pos, - }) - } - } - } - }); - - (fst, content) - } - - /// Returns the identifier corresponding to this definition if the path is composed of exactly - /// one element which is a static identifier. Returns `None` otherwise. - pub fn path_as_ident(&self) -> Option { - if self.path.len() > 1 { - return None; - } - - self.path.first().and_then(|path_elem| match path_elem { - FieldPathElem::Expr(_) => None, - FieldPathElem::Ident(ident) => Some(*ident), - }) - } -} - /// The last field of a record, that can either be a normal field declaration or an ellipsis. #[derive(Clone, Debug)] pub enum RecordLastField<'ast> { From a77046586ace63a1f79d08a05a0e7dd5688bfbfe Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 22 Nov 2024 17:42:33 +0100 Subject: [PATCH 18/23] Fix compilation error after rebase --- core/src/bytecode/ast/compat.rs | 69 +++++----- core/src/bytecode/ast/mod.rs | 12 +- core/src/bytecode/ast/record.rs | 20 +-- core/src/parser/grammar.lalrpop | 45 +++---- core/src/parser/uniterm.rs | 74 ++++++----- core/src/parser/utils.rs | 217 ++------------------------------ core/src/term/mod.rs | 30 +++-- 7 files changed, 127 insertions(+), 340 deletions(-) diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index f689f4ed34..47d213b01b 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -353,26 +353,21 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> { } })); - field_defs.extend( - dyn_fields - .iter() - .map(|(expr, field)| { - let pos_field_name = expr.pos; - let pos = field.value.as_ref().map(|v| pos_field_name.fuse(v.pos)).unwrap_or(pos_field_name); - - if let Node::StrChunks(chunks) = Ast::from_mainline(alloc, expr).node { - record::FieldDef { - path: record::FieldPathElem::single_expr_path(alloc, chunks, pos_field_name), - metadata: field.metadata.to_ast(alloc), - value: field.value.as_ref().map(|term| term.to_ast(alloc)), - pos, - } - } - else { - panic!("expected string chunks to be the only valid option for a dynamic field, but got something else") - } - }) - ); + field_defs.extend(dyn_fields.iter().map(|(expr, field)| { + let pos_field_name = expr.pos; + let pos = field + .value + .as_ref() + .map(|v| pos_field_name.fuse(v.pos)) + .unwrap_or(pos_field_name); + + record::FieldDef { + path: record::FieldPathElem::single_expr_path(alloc, expr.to_ast(alloc)), + metadata: field.metadata.to_ast(alloc), + value: field.value.as_ref().map(|term| term.to_ast(alloc)), + pos, + } + })); alloc.record(Record { field_defs: alloc.alloc_iter(field_defs), @@ -832,11 +827,11 @@ impl<'ast> FromAst> for term::TypeAnnotation { } } -impl<'ast> FromAst>> for term::StrChunk { - fn from_ast(chunk: &StrChunk>) -> Self { +impl<'ast> FromAst>> for term::StrChunk { + fn from_ast(chunk: &StringChunk>) -> Self { match chunk { - StrChunk::Literal(s) => term::StrChunk::Literal(s.clone()), - StrChunk::Expr(expr, indent) => term::StrChunk::Expr(expr.to_mainline(), *indent), + StringChunk::Literal(s) => term::StrChunk::Literal(s.clone()), + StringChunk::Expr(expr, indent) => term::StrChunk::Expr(expr.to_mainline(), *indent), } } } @@ -845,17 +840,14 @@ impl<'ast> FromAst>> for term::StrChunk { /// or a quoted identifier. pub enum FieldName { Ident(LocIdent), - Expr(Vec>, TermPos), + Expr(term::RichTerm), } impl FromAst> for FieldName { fn from_ast(elem: &record::FieldPathElem<'_>) -> Self { match elem { record::FieldPathElem::Ident(id) => FieldName::Ident(*id), - record::FieldPathElem::Expr(chunks, pos) => { - let chunks = chunks.iter().map(ToMainline::to_mainline).collect(); - FieldName::Expr(chunks, *pos) - } + record::FieldPathElem::Expr(node) => FieldName::Expr(node.to_mainline()), } } } @@ -900,11 +892,10 @@ impl<'ast> FromAst> for (FieldName, term::record::Field) pos, )) } - FieldPathElem::Expr(chunks, pos) => { - let pos = *pos; - let chunks: Vec<_> = chunks.iter().map(|chunk| chunk.to_mainline()).collect(); - let exp = term::RichTerm::new(term::Term::StrChunks(chunks), pos); - let static_access = exp.as_ref().try_str_chunk_as_static_str(); + FieldPathElem::Expr(expr) => { + let pos = expr.pos; + let expr = term::RichTerm::from_ast(expr); + let static_access = expr.as_ref().try_str_chunk_as_static_str(); if let Some(static_access) = static_access { let id = LocIdent::new_with_pos(static_access, pos); @@ -926,7 +917,7 @@ impl<'ast> FromAst> for (FieldName, term::record::Field) term::record::Field::from(term::RichTerm::new( term::Term::RecRecord( term::record::RecordData::empty(), - vec![(exp, acc)], + vec![(expr, acc)], None, ), pos, @@ -1432,7 +1423,8 @@ impl<'ast> FromAst> for def in record.field_defs.iter().map(ToMainline::to_mainline) { match def { (FieldName::Ident(id), field) => insert_static_field(&mut static_fields, id, field), - (FieldName::Expr(e, pos), field) => { + (FieldName::Expr(expr), field) => { + let pos = expr.pos; // Dynamic fields (whose name is defined by an interpolated string) have a different // semantics than fields whose name can be determined statically. However, static // fields with special characters are also parsed as string chunks: @@ -1444,8 +1436,7 @@ impl<'ast> FromAst> // Here, both fields are parsed as `StrChunks`, but the first field is actually a // static one, just with special characters. The following code determines which fields // are actually static or not, and inserts them in the right location. - let rt = term::RichTerm::new(term::Term::StrChunks(e), pos); - let static_access = rt.term.as_ref().try_str_chunk_as_static_str(); + let static_access = expr.term.as_ref().try_str_chunk_as_static_str(); if let Some(static_access) = static_access { insert_static_field( @@ -1454,7 +1445,7 @@ impl<'ast> FromAst> field, ) } else { - dynamic_fields.push((rt, field)); + dynamic_fields.push((expr, field)); } } } diff --git a/core/src/bytecode/ast/mod.rs b/core/src/bytecode/ast/mod.rs index 1f070a0896..5860878421 100644 --- a/core/src/bytecode/ast/mod.rs +++ b/core/src/bytecode/ast/mod.rs @@ -204,17 +204,7 @@ impl<'ast> Node<'ast> { /// chunks are [StringChunk::Literal] pub fn try_str_chunk_as_static_str(&self) -> Option { match self { - Node::StringChunks(chunks) => { - chunks - .iter() - .try_fold(String::new(), |mut acc, next| match next { - StringChunk::Literal(lit) => { - acc.push_str(lit); - Some(acc) - } - _ => None, - }) - } + Node::StringChunks(chunks) => StringChunk::try_chunks_as_static_str(*chunks), _ => None, } } diff --git a/core/src/bytecode/ast/record.rs b/core/src/bytecode/ast/record.rs index 7d67648d52..e7dc573649 100644 --- a/core/src/bytecode/ast/record.rs +++ b/core/src/bytecode/ast/record.rs @@ -1,4 +1,4 @@ -use super::{Annotation, Ast, AstAlloc, StrChunk}; +use super::{Annotation, Ast, AstAlloc}; use crate::{identifier::LocIdent, position::TermPos}; @@ -13,8 +13,12 @@ use std::rc::Rc; pub enum FieldPathElem<'ast> { /// A statically known identifier. Ident(LocIdent), - /// A dynamic field name written as a quoted expression, e.g. `"%{protocol}" = .. `. - Expr(&'ast [StrChunk>], TermPos), + /// A dynamic field name written as a quoted expression, e.g. `"%{protocol}" = .. `. Normally, + /// the expression must be a [crate::bytecode::ast::Node::StringChunk], so we could store the + /// chunks directly which would be more precise. However, it's useful to keep a general + /// [crate::bytecode::ast::Ast] to store errors when part of the field path failed to parse + /// correctly. + Expr(Ast<'ast>), } impl<'ast> FieldPathElem<'ast> { @@ -22,7 +26,7 @@ impl<'ast> FieldPathElem<'ast> { pub fn pos(&self) -> TermPos { match self { FieldPathElem::Ident(ident) => ident.pos, - FieldPathElem::Expr(_, pos) => *pos, + FieldPathElem::Expr(expr) => expr.pos, } } @@ -35,12 +39,8 @@ impl<'ast> FieldPathElem<'ast> { } /// Crate a path composed of a single dynamic expression. - pub fn single_expr_path( - alloc: &'ast AstAlloc, - expr: &'ast [StrChunk>], - pos: TermPos, - ) -> &'ast [FieldPathElem<'ast>] { - alloc.alloc_iter(std::iter::once(FieldPathElem::Expr(expr, pos))) + pub fn single_expr_path(alloc: &'ast AstAlloc, expr: Ast<'ast>) -> &'ast [FieldPathElem<'ast>] { + alloc.alloc_iter(std::iter::once(FieldPathElem::Expr(expr))) } } diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index e5882319e7..705fd37f53 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -58,7 +58,7 @@ use crate::{ combine::CombineAlloc, bytecode::ast::{ *, - record::{Field, FieldMetadata}, + record::{FieldMetadata, FieldDef, FieldPathElem}, pattern::*, typ::*, primop::{PrimOp, RecordOpKind}, @@ -467,20 +467,18 @@ RecordField: FieldDef<'ast> = { = { +FieldPath: Vec> = { ".")*> => { elems.push(last); elems @@ -1047,24 +1045,15 @@ StringChunks: Node<'ast> = { StringChunk::Expr(e, _) => e, }); - build_record( - alloc, - [ - ( - FieldPathElem::Ident("tag".into()), - Field::from(Ast::from(builder::enum_tag("SymbolicString"))) - ), - ( - FieldPathElem::Ident("prefix".into()), - Field::from(Ast::from(builder::enum_tag(prefix))) - ), - ( - FieldPathElem::Ident("fragments".into()), - Field::from(Ast::from(alloc.array(terms))) - ) - ], - Default::default() - ) + builder::Record::new() + .field("tag") + .value(alloc, builder::enum_tag("SymbolicString")) + .field("prefix") + .value(alloc, builder::enum_tag(prefix)) + .field("fragments") + .value(alloc, alloc.array(terms)) + .build(alloc) + .node } else { alloc.string_chunks(chunks) } diff --git a/core/src/parser/uniterm.rs b/core/src/parser/uniterm.rs index 6b0bd48f9b..e6e23dd381 100644 --- a/core/src/parser/uniterm.rs +++ b/core/src/parser/uniterm.rs @@ -2,11 +2,11 @@ use super::{error::InvalidRecordTypeError, *}; use error::ParseError; use indexmap::{map::Entry, IndexMap}; -use utils::{build_record, FieldDef, FieldPathElem}; use crate::{ bytecode::ast::{ - record::{Field, FieldMetadata}, + self, + record::{FieldDef, FieldMetadata, FieldPathElem}, typ::{EnumRow, EnumRows, RecordRow, RecordRows, Type}, Annotation, Ast, AstAlloc, MergePriority, Node, }, @@ -266,8 +266,9 @@ impl<'ast> UniRecord<'ast> { let first_without_def = self.fields.iter().find_map(|field_def| { let path_as_ident = field_def.path_as_ident(); - match &field_def.field { - Field { + match &field_def { + FieldDef { + path: _, value: None, metadata: FieldMetadata { @@ -301,8 +302,8 @@ impl<'ast> UniRecord<'ast> { Some((field_def.pos.unwrap(), typ.pos.unwrap())) } } - field => { - if let (Some(ident), Some(_)) = (path_as_ident, &field.value) { + field_def => { + if let (Some(ident), Some(_)) = (path_as_ident, &field_def.value) { candidate_fields.insert(ident.ident(), FieldState::Defined); } @@ -338,8 +339,8 @@ impl<'ast> UniRecord<'ast> { field_def.path.len() == 1 // Warning: this pattern must stay in sync with the // corresponding pattern in `into_type_strict`. - && matches!(&field_def.field, - Field { + && matches!(&field_def, + FieldDef { value: None, metadata: FieldMetadata { @@ -353,6 +354,7 @@ impl<'ast> UniRecord<'ast> { not_exported: false, priority: MergePriority::Neutral, }, + .. } if contracts.is_empty()) }) } @@ -370,12 +372,11 @@ impl<'ast> UniRecord<'ast> { field_def: FieldDef<'ast>, tail: RecordRows<'ast>, ) -> Result, InvalidRecordTypeError> { - // At parsing stage, all `Rc`s must be 1-counted. We can thus call - // `into_owned()` without risking to actually clone anything. - match field_def.field { + match field_def { // Warning: this pattern must stay in sync with the corresponding pattern in // `is_record_type`. - Field { + FieldDef { + path: _, value: None, metadata: FieldMetadata { @@ -389,6 +390,7 @@ impl<'ast> UniRecord<'ast> { not_exported: false, priority: MergePriority::Neutral, }, + pos: _, } if contracts.is_empty() => Ok(RecordRows(RecordRowsF::Extend { row: RecordRow { id, @@ -427,33 +429,32 @@ impl<'ast> UniRecord<'ast> { self.tail .map(|(tail, _)| tail) .unwrap_or(RecordRows(RecordRowsF::Empty)), - |acc: RecordRows, mut field_def| { + |acc: RecordRows, field_def| { // We don't support compound paths for types, yet. // All positions can be unwrapped because we're still parsing. if field_def.path.len() > 1 { let span = field_def .path .into_iter() - .map(|path_elem| match path_elem { - FieldPathElem::Ident(id) => id.pos.unwrap(), - FieldPathElem::Expr(rt) => rt.pos.unwrap(), - }) + .map(|path_elem| path_elem.pos().unwrap()) .reduce(|acc, span| acc.fuse(span).unwrap_or(acc)) // We already checked that the path is non-empty. .unwrap(); Err(InvalidRecordTypeError::InvalidField(span)) } else { - let elem = field_def.path.pop().unwrap(); + let elem = field_def.path.last().unwrap(); + let id = match elem { - FieldPathElem::Ident(id) => id, + FieldPathElem::Ident(id) => *id, FieldPathElem::Expr(expr) => { + let pos = expr.pos; let name = expr.node.try_str_chunk_as_static_str().ok_or( InvalidRecordTypeError::InterpolatedField( field_def.pos.unwrap(), ), )?; - LocIdent::new_with_pos(name, expr.pos) + LocIdent::new_with_pos(name, pos) } }; if let Some(prev_id) = fields_seen.insert(id.ident(), id) { @@ -486,11 +487,13 @@ impl<'ast> TryConvert<'ast, UniRecord<'ast>> for Ast<'ast> { /// Convert a `UniRecord` to a term. If the `UniRecord` is syntactically a record type or it /// has a tail, it is first interpreted as a type and then wrapped in a `Term::Type`. One /// exception is the empty record, which behaves the same both as a type and a contract, and - /// turning an empty record literal to an opaque function would break everything. + /// turning an empty record literal to an opaque contract would break everything, so the empty + /// record is always interpreted as a term directly. /// - /// Otherwise it is interpreted as a record directly. Fail if the `UniRecord` has a tail but - /// isn't syntactically a record type either. Elaborate field paths `foo.bar = value` to the - /// expanded form `{foo = {bar = value}}`. + /// If the unirecord isn't a record type and doesn't have a tail, it is interpreted as an + /// equivalent record term. Fail if the `UniRecord` has a tail but isn't syntactically a record + /// type either. Elaborate field paths `foo.bar = value` to the expanded form `{foo = {bar = + /// value}}`. /// /// We also fix the type variables of the type appearing inside annotations (see in-code /// documentation of the private symbol `FixTypeVars::fix_type_vars`). @@ -516,16 +519,27 @@ impl<'ast> TryConvert<'ast, UniRecord<'ast>> for Ast<'ast> { ur.check_typed_field_without_def()?; let UniRecord { fields, open, .. } = ur; - let elaborated = fields + + let field_defs_fixed = fields .into_iter() - .map(|mut field_def| { - field_def.field.metadata = - fix_field_types(alloc, field_def.field.metadata, field_def.pos.unwrap())?; - Ok(field_def.elaborate(alloc)) + .map(|field_def| { + Ok(FieldDef { + metadata: fix_field_types( + alloc, + field_def.metadata, + field_def.pos.unwrap(), + )?, + ..field_def + }) }) .collect::, _>>()?; - Ok(build_record(alloc, elaborated, open).spanned(pos)) + Ok(alloc + .record(ast::record::Record { + field_defs: alloc.alloc_iter(field_defs_fixed), + open, + }) + .spanned(pos)) } } } diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 30bc8727b6..b6c6d0ace1 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -7,24 +7,22 @@ use std::{ {collections::HashSet, fmt::Debug}, }; -use indexmap::{map::Entry, IndexMap}; - use super::error::ParseError; use crate::{ app, bytecode::ast::{ pattern::bindings::Bindings as _, - record::{Field, FieldMetadata, FieldDef}, + record::{FieldDef, FieldMetadata}, *, }, cache::InputFormat, combine::CombineAlloc, - eval::merge::{merge_doc, split}, + eval::merge::merge_doc, files::FileId, fun, identifier::LocIdent, - label::{Label, MergeKind, MergeLabel}, + label::{Label, MergeLabel}, position::{RawSpan, TermPos}, primop_app, typ::Type, @@ -271,15 +269,6 @@ impl<'ast> CombineAlloc<'ast> for FieldMetadata<'ast> { } } -impl<'ast> AttachToAst<'ast, Field<'ast>> for FieldMetadata<'ast> { - fn attach_to_ast(self, _alloc: &'ast AstAlloc, ast: Ast<'ast>) -> Field<'ast> { - Field { - value: Some(ast), - metadata: self, - } - } -} - impl<'ast> CombineAlloc<'ast> for LetMetadata<'ast> { /// Combine two let metadata into one. Same as `FieldMetadata::combine` but restricted to the /// metadata that can be associated to a let block. @@ -345,198 +334,6 @@ pub fn mk_access<'ast>(alloc: &'ast AstAlloc, access: Ast<'ast>, root: Ast<'ast> } } -/// Build a record from a list of field definitions. If a field is defined several times, the -/// different definitions are merged. -pub fn build_record<'ast, I>(alloc: &'ast AstAlloc, fields: I, open: bool) -> Node<'ast> -where - I: IntoIterator, Field<'ast>)> + Debug, -{ - use indexmap::IndexMap; - - // We keep a hashmap to make it faster to merge fields with the same identifier. - let mut static_fields = IndexMap::new(); - let mut dynamic_fields = Vec::new(); - - fn insert_static_field<'ast>( - alloc: &'ast AstAlloc, - static_fields: &mut IndexMap>, - id: LocIdent, - field: Field<'ast>, - ) { - match static_fields.entry(id) { - Entry::Occupied(mut occpd) => { - // temporarily putting an empty field in the entry to take the previous value. - let prev = occpd.insert(Field::default()); - - // unwrap(): the field's identifier must have a position during parsing. - occpd.insert(merge_fields(alloc, id.pos.unwrap(), prev, field)); - } - Entry::Vacant(vac) => { - vac.insert(field); - } - } - } - - fields.into_iter().for_each(|field| match field { - (FieldPathElem::Ident(id), field) => { - insert_static_field(alloc, &mut static_fields, id, field) - } - (FieldPathElem::Expr(e), field) => { - // Dynamic fields (whose name is defined by an interpolated string) have a different - // semantics than fields whose name can be determined statically. However, static - // fields with special characters are also parsed as string chunks: - // - // ``` - // let x = "dynamic" in {"I%am.static" = false, "%{x}" = true} - // ``` - // - // Here, both fields are parsed as `StringChunks`, but the first field is actually a - // static one, just with special characters. The following code determines which fields - // are actually static or not, and inserts them in the right location. - let static_access = e.node.try_str_chunk_as_static_str(); - - if let Some(static_access) = static_access { - insert_static_field( - alloc, - &mut static_fields, - LocIdent::new_with_pos(static_access, e.pos), - field, - ) - } else { - dynamic_fields.push((e, field)); - } - } - }); - - Node::Record(alloc.record_data(static_fields, dynamic_fields, open)) -} - -/// Merge two fields by performing the merge of both their value (dynamically if -/// necessary, by introducing a merge operator) and their metadata (statically). -/// -/// If the values of both fields are records, their merge is computed statically. This prevents -/// building terms whose depth is linear in the number of fields if partial definitions are -/// involved. This manifested in https://github.com/tweag/nickel/issues/1427. -fn merge_fields<'ast>( - alloc: &'ast AstAlloc, - id_span: RawSpan, - field1: Field<'ast>, - field2: Field<'ast>, -) -> Field<'ast> { - // FIXME: We're duplicating a lot of the logic in [`eval::merge::merge_fields`] but not quite - // enough to actually factor it out - fn merge_values<'ast>( - alloc: &'ast AstAlloc, - id_span: RawSpan, - t1: Ast<'ast>, - t2: Ast<'ast>, - ) -> Ast<'ast> { - match (t1.node, t2.node) { - // We don't handle the case of record with dynamic fields, as merging statically and - // dynamically won't have the same semantics if a dynamic field has the same name as - // one of the field of the other record (merging statically will error out, while - // merging dynamically will properly merge their values). - // - // This wasn't handled before the move to the new ast (RFC007) either anyway. - (Node::Record(rd1), Node::Record(rd2)) - if rd1.dyn_fields.is_empty() && rd2.dyn_fields.is_empty() => - { - // We collect fields into temporary hashmaps to easily compute the split. - let left_hashed: IndexMap> = rd1 - .stat_fields - .iter() - .map(|(id, field)| (*id, field.clone())) - .collect(); - let right_hashed: IndexMap> = rd2 - .stat_fields - .iter() - .map(|(id, field)| (*id, field.clone())) - .collect(); - let split::SplitResult { - left, - center, - right, - } = split::split(left_hashed, right_hashed); - - let mut fields = Vec::with_capacity(left.len() + center.len() + right.len()); - fields.extend(left); - fields.extend(right); - for (id, (field1, field2)) in center.into_iter() { - fields.push((id, merge_fields(alloc, id_span, field1, field2))); - } - - Ast { - node: Node::Record(alloc.record_data( - fields, - std::iter::empty(), - rd1.open || rd2.open, - )), - //[^record-elaboration-position]: we don't really have a good position to put here. In the end, maybe we - //should keep `TermPos` in `Ast` as long as the parser has to do some of the - //desugaring. - pos: TermPos::None, - } - } - (node1, node2) => Ast { - node: alloc.prim_op( - primop::PrimOp::Merge(MergeKind::Standard), - [ - Ast { - node: node1, - pos: t1.pos, - }, - Ast { - node: node2, - pos: t2.pos, - }, - ], - ), - // We don't have a very good position here either (see - // [^record-elaboration-position]). However, as long as we convert the new AST to - // the mainline `term::Term` representation, we will need to set a span (and not - // just a position) for the merge label. Previously, we would use `id_span`. So we - // set `id_span` as a position so that the conversion code of - // `bytecode::ast::compat` can retrieve it and put in the merge label accordingly. - pos: id_span.into(), - }, - } - } - - let (value, priority) = match (field1.value, field2.value) { - (Some(t1), Some(t2)) if field1.metadata.priority == field2.metadata.priority => ( - Some(merge_values(alloc, id_span, t1, t2)), - field1.metadata.priority, - ), - (Some(t), _) if field1.metadata.priority > field2.metadata.priority => { - (Some(t), field1.metadata.priority) - } - (_, Some(t)) if field1.metadata.priority < field2.metadata.priority => { - (Some(t), field2.metadata.priority) - } - (Some(t), None) => (Some(t), field1.metadata.priority), - (None, Some(t)) => (Some(t), field2.metadata.priority), - (None, None) => (None, Default::default()), - _ => unreachable!(), - }; - - Field { - value, - // [`FieldMetadata::combine`] produces subtly different behaviour from - // the runtime merging code, which is what we need to replicate here - metadata: FieldMetadata { - doc: merge_doc(field1.metadata.doc, field2.metadata.doc), - annotation: CombineAlloc::combine( - alloc, - field1.metadata.annotation, - field2.metadata.annotation, - ), - opt: field1.metadata.opt && field2.metadata.opt, - not_exported: field1.metadata.not_exported || field2.metadata.not_exported, - priority, - }, - } -} - /// Make a span from parser byte offsets. pub fn mk_span(src_id: FileId, l: usize, r: usize) -> RawSpan { RawSpan { @@ -584,16 +381,16 @@ pub fn mk_let<'ast>( for b in &bindings { let new_bindings = b.pattern.bindings(); - for (_path, id, _field) in &new_bindings { - if let Some(old) = seen_bindings.get(id) { + for binding in &new_bindings { + if let Some(old) = seen_bindings.get(&binding.id) { return Err(ParseError::DuplicateIdentInLetBlock { - ident: *id, + ident: binding.id, prev_ident: *old, }); } } - seen_bindings.extend(new_bindings.into_iter().map(|(_path, id, _field)| id)); + seen_bindings.extend(new_bindings.into_iter().map(|binding| binding.id)); } Ok(alloc.let_block(bindings, body, rec)) diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index 0b153d9c41..1d115b723a 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -982,11 +982,27 @@ pub enum StrChunk { ), } -#[cfg(test)] impl StrChunk { + #[cfg(test)] pub fn expr(e: E) -> Self { StrChunk::Expr(e, 0) } + + pub fn try_chunks_as_static_str<'a, I>(chunks: I) -> Option + where + I: IntoIterator>, + E: 'a, + { + chunks + .into_iter() + .try_fold(String::new(), |mut acc, next| match next { + StrChunk::Literal(lit) => { + acc.push_str(lit); + Some(acc) + } + _ => None, + }) + } } impl Term { @@ -1208,17 +1224,7 @@ impl Term { /// when the term is a `Term::StrChunk` and all the chunks are `StrChunk::Literal(..)` pub fn try_str_chunk_as_static_str(&self) -> Option { match self { - Term::StrChunks(chunks) => { - chunks - .iter() - .try_fold(String::new(), |mut acc, next| match next { - StrChunk::Literal(lit) => { - acc.push_str(lit); - Some(acc) - } - _ => None, - }) - } + Term::StrChunks(chunks) => StrChunk::try_chunks_as_static_str(chunks), _ => None, } } From 1f8fb29cdf7b479a6554fe8dcc12ad23615cf949 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Fri, 22 Nov 2024 18:44:20 +0100 Subject: [PATCH 19/23] Fix missing field name; dont use generated ident for op curryfication --- core/src/bytecode/ast/compat.rs | 10 +++++++--- core/src/bytecode/ast/record.rs | 15 +++++++++++++-- core/src/parser/utils.rs | 24 ++++++++++++++---------- core/src/term/record.rs | 16 ++++++---------- 4 files changed, 40 insertions(+), 25 deletions(-) diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index 47d213b01b..f4361696f5 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -861,15 +861,19 @@ impl<'ast> FromAst> for (FieldName, term::record::Field) /// - /!\ path must be **non-empty**, otherwise this function panics use super::record::FieldPathElem; - let mut it = field.path.iter(); - let fst = it.next().unwrap(); + // unwrap(): field paths must be non-empty + let name_innermost = field.path.last().unwrap().try_as_ident(); let initial = term::record::Field { value: field.value.as_ref().map(ToMainline::to_mainline), - metadata: field.metadata.to_mainline(), + metadata: term::record::FieldMetadata::from_ast(&field.metadata) + .with_field_name(name_innermost), pending_contracts: Vec::new(), }; + let mut it = field.path.iter(); + let fst = it.next().unwrap(); + let content = it.rev().fold(initial, |acc, path_elem| { // We first compute a position for the intermediate generated records (it's useful // in particular for the LSP). The position starts at the subpath corresponding to diff --git a/core/src/bytecode/ast/record.rs b/core/src/bytecode/ast/record.rs index e7dc573649..64cb9c4332 100644 --- a/core/src/bytecode/ast/record.rs +++ b/core/src/bytecode/ast/record.rs @@ -42,6 +42,17 @@ impl<'ast> FieldPathElem<'ast> { pub fn single_expr_path(alloc: &'ast AstAlloc, expr: Ast<'ast>) -> &'ast [FieldPathElem<'ast>] { alloc.alloc_iter(std::iter::once(FieldPathElem::Expr(expr))) } + + /// Try to interpret this element element as a static identifier. Returns `None` if the the + /// element is an expression with interpolation inside. + pub fn try_as_ident(&self) -> Option { + match self { + FieldPathElem::Ident(ident) => Some(*ident), + FieldPathElem::Expr(expr) => { + expr.node.try_str_chunk_as_static_str().map(LocIdent::from) + } + } + } } /// A field definition. A field is defined by a dot-separated path of identifier or interpolated @@ -62,8 +73,8 @@ impl<'ast> FieldDef<'ast> { /// Returns the identifier corresponding to this definition if the path is composed of exactly /// one element which is a static identifier. Returns `None` otherwise. pub fn path_as_ident(&self) -> Option { - if let [FieldPathElem::Ident(ident)] = self.path { - Some(*ident) + if let [elem] = self.path { + elem.try_as_ident() } else { None } diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index b6c6d0ace1..5a81b8245a 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -132,6 +132,9 @@ pub(super) struct InfixOp(pub(super) primop::PrimOp); impl EtaExpand for InfixOp { fn eta_expand(self, alloc: &AstAlloc, pos: TermPos) -> Node<'_> { + // We could use `LocIdent::fresh` for the newly introduced function parameters. However, + // it has the issue that pretty printing them doesn't result in valid Nickel anymore. This + // is why we prefer normal identifier like `x` or `y`. match self { // We treat `UnaryOp::BoolAnd` and `UnaryOp::BoolOr` separately. // @@ -143,8 +146,8 @@ impl EtaExpand for InfixOp { // `). Thus, instead of eta-expanding to `fun x => x` as we would for other // unary operators, we eta-expand to `fun x1 x2 => x1 x2`. InfixOp(op @ primop::PrimOp::BoolAnd) | InfixOp(op @ primop::PrimOp::BoolOr) => { - let fst_arg = LocIdent::fresh(); - let snd_arg = LocIdent::fresh(); + let fst_arg = LocIdent::from("x"); + let snd_arg = LocIdent::from("y"); fun!( alloc, @@ -163,8 +166,8 @@ impl EtaExpand for InfixOp { // version `(.)` has thus reversed argument corresponding to the `RecordGet` primop, so // we need to flip them. InfixOp(op @ primop::PrimOp::RecordGet) => { - let fst_arg = LocIdent::fresh(); - let snd_arg = LocIdent::fresh(); + let fst_arg = LocIdent::new("x"); + let snd_arg = LocIdent::new("y"); fun!( alloc, @@ -176,8 +179,9 @@ impl EtaExpand for InfixOp { .node } InfixOp(op) => { - let vars: Vec<_> = iter::repeat_with(|| LocIdent::fresh()) - .take(op.arity()) + let vars: Vec<_> = (0..op.arity()) + .into_iter() + .map(|i| LocIdent::from(format!("x{i}"))) .collect(); let fun_args: Vec<_> = vars.iter().map(|arg| pattern::Pattern::any(*arg)).collect(); let args: Vec<_> = vars.into_iter().map(builder::var).collect(); @@ -202,8 +206,8 @@ impl EtaExpand for ExtendedInfixOp { fn eta_expand(self, alloc: &AstAlloc, pos: TermPos) -> Node<'_> { match self { ExtendedInfixOp::ReverseApp => { - let fst_arg = LocIdent::fresh(); - let snd_arg = LocIdent::fresh(); + let fst_arg = LocIdent::from("x"); + let snd_arg = LocIdent::from("y"); fun!( alloc, @@ -214,8 +218,8 @@ impl EtaExpand for ExtendedInfixOp { .node } ExtendedInfixOp::NotEqual => { - let fst_arg = LocIdent::fresh(); - let snd_arg = LocIdent::fresh(); + let fst_arg = LocIdent::from("x"); + let snd_arg = LocIdent::from("y"); fun!( alloc, diff --git a/core/src/term/record.rs b/core/src/term/record.rs index 6b1a9c9d31..beed017b4b 100644 --- a/core/src/term/record.rs +++ b/core/src/term/record.rs @@ -127,6 +127,12 @@ impl FieldMetadata { && !self.not_exported && matches!(self.priority, MergePriority::Neutral) } + + /// Set the `field_name` attribute of the labels of the type and contracts annotations. + pub fn with_field_name(mut self, name: Option) -> Self { + self.annotation = self.annotation.with_field_name(name); + self + } } impl Combine for FieldMetadata { @@ -230,16 +236,6 @@ impl Field { RecordExtKind::WithoutValue } } - - pub fn with_name(self, field_name: Option) -> Self { - Field { - metadata: FieldMetadata { - annotation: self.metadata.annotation.with_field_name(field_name), - ..self.metadata - }, - ..self - } - } } impl Traverse for Field { From 1274e470d3da1fc38a70d391651c17bf6dfa0621 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 25 Nov 2024 14:53:51 +0100 Subject: [PATCH 20/23] Fix missing position panic, remove unused function --- core/src/parser/uniterm.rs | 8 ++++++-- core/src/parser/utils.rs | 15 ++++----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/core/src/parser/uniterm.rs b/core/src/parser/uniterm.rs index e6e23dd381..5377f782cc 100644 --- a/core/src/parser/uniterm.rs +++ b/core/src/parser/uniterm.rs @@ -96,7 +96,8 @@ where // For nodes such as `Type` or `Record`, the following implementation has to choose between two // positions to use: the one of the wrapping `UniTerm`, and the one stored inside the `RichTerm` or // the `Type`. This implementation assumes that the latest set is the one of `UniTerm`, which is -// the single source of truth. +// the single source of truth. In fact, it happens that only the outermost uniterm position is set +// while the innermost is still `TermPos::None`. impl<'ast> TryConvert<'ast, UniTerm<'ast>> for Type<'ast> { type Error = ParseError; @@ -123,7 +124,10 @@ impl<'ast> TryConvert<'ast, UniTerm<'ast>> for Type<'ast> { return Err(ParseError::InvalidContract(ut.pos.unwrap())); } - TypeF::Contract(alloc.ast(ast)) + TypeF::Contract(alloc.ast(Ast { + node: ast.node, + pos, + })) } }; diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 5a81b8245a..c3eb73492f 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -233,8 +233,10 @@ impl EtaExpand for ExtendedInfixOp { primop::PrimOp::Eq, builder::var(fst_arg), builder::var(snd_arg), - ), - ), + ) + .with_pos(pos), + ) + .with_pos(pos), ) .node } @@ -360,15 +362,6 @@ pub fn mk_label(typ: Type, src_id: FileId, l: usize, r: usize) -> Label { } } -/// Same as `mk_span`, but for merge labels. The kind is set to the default one -/// (`MergeKind::Standard`). -pub fn mk_merge_label(src_id: FileId, l: usize, r: usize) -> MergeLabel { - MergeLabel { - span: mk_span(src_id, l, r), - kind: Default::default(), - } -} - /// Checks that there are no duplicate bindings in a let block (when bindins are simple, that is /// they aren't pattern), and builds the corresponding let block node if the check passes. pub fn mk_let<'ast>( From 46411046ce73dbe9cec50ce26d62de99f3b5e5b4 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 25 Nov 2024 15:05:27 +0100 Subject: [PATCH 21/23] Add measures for AST conversion --- core/src/parser/mod.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/parser/mod.rs b/core/src/parser/mod.rs index 0b05160520..a6b9225163 100644 --- a/core/src/parser/mod.rs +++ b/core/src/parser/mod.rs @@ -6,6 +6,7 @@ use crate::bytecode::ast::{ use crate::error::{ParseError, ParseErrors}; use crate::files::FileId; use crate::identifier::LocIdent; +use crate::metrics; use crate::position::RawSpan; use lalrpop_util::lalrpop_mod; @@ -212,8 +213,12 @@ macro_rules! generate_compat_impl { lexer: lexer::Lexer, ) -> Result<($output, ParseErrors), ParseError> { let alloc = AstAlloc::new(); - self.parse_tolerant(&alloc, file_id, lexer) - .map(|(t, e)| (t.to_mainline(), e)) + self.parse_tolerant(&alloc, file_id, lexer).map(|(t, e)| { + ( + metrics::measure_runtime!("runtime:ast_conversion", t.to_mainline()), + e, + ) + }) } fn parse_strict_compat( @@ -223,7 +228,7 @@ macro_rules! generate_compat_impl { ) -> Result<$output, ParseErrors> { let alloc = AstAlloc::new(); self.parse_strict(&alloc, file_id, lexer) - .map(|t| t.to_mainline()) + .map(|t| metrics::measure_runtime!("runtime:ast_conversion", t.to_mainline())) } } }; @@ -236,7 +241,6 @@ generate_compat_impl!( generate_compat_impl!(grammar::TermParser, crate::term::RichTerm); generate_compat_impl!(grammar::FixedTypeParser, crate::typ::Type); -// We could have implemented ToMainline impl<'ast> ErrorTolerantParserCompat<(Vec, crate::term::RichTerm, RawSpan)> for grammar::CliFieldAssignmentParser { From e86bc0a1f4146314c6e43a83c04c20fb6a1a2f76 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 26 Nov 2024 11:53:35 +0100 Subject: [PATCH 22/23] Fix clippy and cargo doc warnings --- core/src/bytecode/ast/mod.rs | 8 ++++---- core/src/bytecode/ast/record.rs | 2 +- core/src/combine.rs | 8 ++++---- core/src/parser/error.rs | 4 ++-- core/src/parser/grammar.lalrpop | 2 +- core/src/parser/mod.rs | 2 +- core/src/parser/uniterm.rs | 10 +++++----- core/src/parser/utils.rs | 21 ++++++++++----------- 8 files changed, 28 insertions(+), 29 deletions(-) diff --git a/core/src/bytecode/ast/mod.rs b/core/src/bytecode/ast/mod.rs index 5860878421..16b38f73ab 100644 --- a/core/src/bytecode/ast/mod.rs +++ b/core/src/bytecode/ast/mod.rs @@ -357,6 +357,10 @@ impl AstAlloc { self.generic_arena.alloc_slice_fill_iter(iter) } + pub fn alloc_str<'ast>(&'ast self, s: &str) -> &'ast str { + self.generic_arena.alloc_str(s) + } + pub fn node<'ast>(&'ast self, node: Node<'ast>) -> &'ast Node<'ast> { self.generic_arena.alloc(node) } @@ -373,10 +377,6 @@ impl AstAlloc { Node::String(self.generic_arena.alloc_str(s)) } - pub fn string_move<'ast>(&'ast self, s: &str) -> &'_ str { - self.generic_arena.alloc_str(s) - } - pub fn string_chunks<'ast, I>(&'ast self, chunks: I) -> Node<'ast> where I: IntoIterator>>, diff --git a/core/src/bytecode/ast/record.rs b/core/src/bytecode/ast/record.rs index 64cb9c4332..4b8118a2a1 100644 --- a/core/src/bytecode/ast/record.rs +++ b/core/src/bytecode/ast/record.rs @@ -14,7 +14,7 @@ pub enum FieldPathElem<'ast> { /// A statically known identifier. Ident(LocIdent), /// A dynamic field name written as a quoted expression, e.g. `"%{protocol}" = .. `. Normally, - /// the expression must be a [crate::bytecode::ast::Node::StringChunk], so we could store the + /// the expression must be a [crate::bytecode::ast::Node::StringChunks], so we could store the /// chunks directly which would be more precise. However, it's useful to keep a general /// [crate::bytecode::ast::Ast] to store errors when part of the field path failed to parse /// correctly. diff --git a/core/src/combine.rs b/core/src/combine.rs index c99f3217d6..f508089b79 100644 --- a/core/src/combine.rs +++ b/core/src/combine.rs @@ -11,10 +11,10 @@ pub trait Combine: Default { fn combine(left: Self, right: Self) -> Self; } -/// [combine::Combine] doens't work for new ast nodes, which requires an external allocator to -/// create new nodes. This trait is a version that takes this additional allocator. It's temporary: -/// I suspect we won't need the original general `Combine` trait once we move to the bytecode vm, -/// as [crate::combine::Combine] is used mostly on ast-like data. +/// [Combine] doens't work for new ast nodes, which requires an external allocator to create new +/// nodes. This trait is a version that takes this additional allocator. It's temporary: I suspect +/// we won't need the original general [Combine] trait once we move to the bytecode VM, as +/// [Combine] is used mostly on ast-like data, and we will rename [CombineAlloc] to [Combine]. pub trait CombineAlloc<'ast> { fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self; } diff --git a/core/src/parser/error.rs b/core/src/parser/error.rs index 2a05bbc111..834d9844e5 100644 --- a/core/src/parser/error.rs +++ b/core/src/parser/error.rs @@ -21,10 +21,10 @@ pub enum LexicalError { Generic(Range), } -/// Error indicating that a construct is not allowed when trying to interpret an `UniRecord` as a +/// Error indicating that a construct is not allowed when trying to interpret a `UniRecord` as a /// record type in a strict way. /// -/// See [`UniRecord::into_type_strict`](crate::parser::uniterm::UniRecord::into_type_strict). +/// See `parser::uniterm::UniRecord::into_type_strict`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum InvalidRecordTypeError { /// The record type had an invalid field, for example because it had a contract, diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 705fd37f53..c093b25a71 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -719,7 +719,7 @@ ConstantPatternData: ConstantPatternData<'ast> = { // We could accept multiline strings here, but it's unlikely that this will // result in very readable match expressions. For now we restrict ourselves // to standard string; we can always extend to multiline later if needed - StandardStaticString => ConstantPatternData::String(alloc.string_move(&<>)), + StandardStaticString => ConstantPatternData::String(alloc.alloc_str(&<>)), "null" => ConstantPatternData::Null, }; diff --git a/core/src/parser/mod.rs b/core/src/parser/mod.rs index a6b9225163..7d7b762205 100644 --- a/core/src/parser/mod.rs +++ b/core/src/parser/mod.rs @@ -241,7 +241,7 @@ generate_compat_impl!( generate_compat_impl!(grammar::TermParser, crate::term::RichTerm); generate_compat_impl!(grammar::FixedTypeParser, crate::typ::Type); -impl<'ast> ErrorTolerantParserCompat<(Vec, crate::term::RichTerm, RawSpan)> +impl ErrorTolerantParserCompat<(Vec, crate::term::RichTerm, RawSpan)> for grammar::CliFieldAssignmentParser { fn parse_tolerant_compat( diff --git a/core/src/parser/uniterm.rs b/core/src/parser/uniterm.rs index 5377f782cc..0a49d61dbc 100644 --- a/core/src/parser/uniterm.rs +++ b/core/src/parser/uniterm.rs @@ -388,14 +388,14 @@ impl<'ast> UniRecord<'ast> { annotation: Annotation { typ: Some(typ), - contracts, + contracts: [], }, opt: false, not_exported: false, priority: MergePriority::Neutral, }, pos: _, - } if contracts.is_empty() => Ok(RecordRows(RecordRowsF::Extend { + } => Ok(RecordRows(RecordRowsF::Extend { row: RecordRow { id, typ: alloc.type_data(typ.typ, typ.pos), @@ -439,7 +439,7 @@ impl<'ast> UniRecord<'ast> { if field_def.path.len() > 1 { let span = field_def .path - .into_iter() + .iter() .map(|path_elem| path_elem.pos().unwrap()) .reduce(|acc, span| acc.fuse(span).unwrap_or(acc)) // We already checked that the path is non-empty. @@ -766,7 +766,7 @@ where ) -> Result, ParseError>; } -impl<'ast, 'a> FixTypeVars<'ast> for Type<'ast> { +impl<'ast> FixTypeVars<'ast> for Type<'ast> { fn fix_type_vars_env( &self, alloc: &'ast AstAlloc, @@ -1068,7 +1068,7 @@ pub fn fix_field_types<'ast>( .map(|ctr| { Ok(ctr .fix_type_vars_ref(alloc, span)? - .map(|typ| Cow::Owned(typ)) + .map(Cow::Owned) .unwrap_or(Cow::Borrowed(ctr))) }) .collect(); diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index c3eb73492f..946f8029a1 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -22,7 +22,7 @@ use crate::{ files::FileId, fun, identifier::LocIdent, - label::{Label, MergeLabel}, + label::Label, position::{RawSpan, TermPos}, primop_app, typ::Type, @@ -180,7 +180,6 @@ impl EtaExpand for InfixOp { } InfixOp(op) => { let vars: Vec<_> = (0..op.arity()) - .into_iter() .map(|i| LocIdent::from(format!("x{i}"))) .collect(); let fun_args: Vec<_> = vars.iter().map(|arg| pattern::Pattern::any(*arg)).collect(); @@ -393,11 +392,11 @@ pub fn mk_let<'ast>( Ok(alloc.let_block(bindings, body, rec)) } -pub fn mk_import_based_on_filename<'ast>( - alloc: &'ast AstAlloc, +pub fn mk_import_based_on_filename( + alloc: &AstAlloc, path: String, _span: RawSpan, -) -> Result, ParseError> { +) -> Result, ParseError> { let path = OsString::from(path); let format: Option = InputFormat::from_path(std::path::Path::new(path.as_os_str())); @@ -408,12 +407,12 @@ pub fn mk_import_based_on_filename<'ast>( Ok(alloc.import_path(path, format)) } -pub fn mk_import_explicit<'ast>( - alloc: &'ast AstAlloc, +pub fn mk_import_explicit( + alloc: &AstAlloc, path: String, format: LocIdent, span: RawSpan, -) -> Result, ParseError> { +) -> Result, ParseError> { let path = OsString::from(path); let Some(format) = InputFormat::from_tag(format.label()) else { return Err(ParseError::InvalidImportFormat { span }); @@ -428,7 +427,7 @@ pub fn mk_import_explicit<'ast>( /// indentation level of a line is the number of consecutive whitespace characters, which are /// either a space or a tab, counted from the beginning of the line. If a line is empty or consist /// only of whitespace characters, it is ignored. -pub fn min_indent<'ast>(chunks: &[StringChunk>]) -> usize { +pub fn min_indent(chunks: &[StringChunk>]) -> usize { let mut min: usize = usize::MAX; let mut current = 0; let mut start_line = true; @@ -521,12 +520,12 @@ pub fn min_indent<'ast>(chunks: &[StringChunk>]) -> usize { ///not sth /// end" /// ``` -pub fn strip_indent<'ast>(chunks: &mut Vec>>) { +pub fn strip_indent(chunks: &mut [StringChunk>]) { if chunks.is_empty() { return; } - let min = min_indent(&chunks); + let min = min_indent(chunks); let mut current = 0; let mut start_line = true; let chunks_len = chunks.len(); From 72d663d126466daeef30f4ab53fcb1e2db0e703a Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 27 Nov 2024 10:44:48 +0100 Subject: [PATCH 23/23] Update core/src/parser/uniterm.rs Co-authored-by: jneem --- core/src/parser/uniterm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/parser/uniterm.rs b/core/src/parser/uniterm.rs index 0a49d61dbc..ddd3ec3157 100644 --- a/core/src/parser/uniterm.rs +++ b/core/src/parser/uniterm.rs @@ -755,7 +755,7 @@ where /// /// Because AST nodes are allocated in an arena and are immutable, they won't be reclaimed /// until the whole AST is finally transformed to either the mainline AST or (in the future) - /// compiled to bytecode. We want to avoid building useless copies of exiting nodes, which is + /// compiled to bytecode. We want to avoid building useless copies of existing nodes, which is /// the reason behind not using a simpler strategy of just always returning a new value, that /// might be identical to the old one if no type variable has been fixed. fn fix_type_vars_env(