diff --git a/core/Cargo.toml b/core/Cargo.toml index e802fa8d23..39c34617bc 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -28,7 +28,6 @@ doc = ["dep:comrak"] format = ["dep:topiary-core", "dep:topiary-queries", "dep:tree-sitter-nickel"] metrics = ["dep:metrics"] nix-experimental = [ "dep:cxx", "dep:cxx-build", "dep:pkg-config" ] -bytecode-experimental = ["dep:bumpalo"] benchmark-ci = [] [build-dependencies] @@ -87,7 +86,7 @@ tree-sitter-nickel = { workspace = true, optional = true } metrics = { workspace = true, optional = true } strsim = "0.10.0" -bumpalo = { workspace = true, optional = true } +bumpalo = { workspace = true } [dev-dependencies] pretty_assertions.workspace = true diff --git a/core/src/bytecode/ast/builder.rs b/core/src/bytecode/ast/builder.rs index a7d58ee02a..521796906f 100644 --- a/core/src/bytecode/ast/builder.rs +++ b/core/src/bytecode/ast/builder.rs @@ -309,6 +309,61 @@ impl<'ast> Record<'ast> { } } +/// Multi-ary application for types implementing `Into`. +#[macro_export] +macro_rules! app { + ( $alloc:expr, $f:expr $(, $args:expr )+ $(,)?) => { + { + let args = vec![$( $crate::bytecode::ast::Ast::from($args) ),+]; + + $crate::bytecode::ast::Ast::from($alloc.app($crate::bytecode::ast::Ast::from($f), args)) + } + }; +} + +#[macro_export] +/// Multi-ary application for types implementing `Into`. +macro_rules! primop_app { + ( $alloc: expr, $op:expr $(, $args:expr )+ $(,)?) => { + { + let args = vec![$( $crate::bytecode::ast::Ast::from($args) ),+]; + $crate::bytecode::ast::Ast::from($alloc.prim_op($op, args)) + } + }; +} + +#[macro_export] +/// Multi argument function for types implementing `Into` (for the identifiers), and +/// `Into` for the body. +macro_rules! fun { + ( $alloc: expr, $id:expr, $body:expr $(,)?) => { + $crate::bytecode::ast::Ast::from( + $alloc.fun( + $crate::bytecode::ast::pattern::Pattern::any($crate::identifier::LocIdent::from($id)), + $crate::bytecode::ast::Ast::from($body) + ) + ) + }; + ( $alloc:expr, $id1:expr, $id2:expr $(, $rest:expr )+ $(,)?) => { + fun!( + $alloc, + $id1, + fun!($alloc, $id2, $( $rest ),+) + ) + }; +} + +pub fn var<'ast>(id: impl Into) -> Ast<'ast> { + Ast::from(Node::Var(id.into())) +} + +pub fn enum_tag<'ast>(tag: impl Into) -> Ast<'ast> { + Ast::from(Node::EnumVariant { + tag: tag.into(), + arg: None, + }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index 1e47eadc53..f4361696f5 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -131,7 +131,7 @@ impl<'ast> FromMainline<'ast, term::pattern::ConstantPattern> for PatternData<'a term::pattern::ConstantPatternData::Null => ConstantPatternData::Null, }; - PatternData::Constant(alloc.constant_pattern(ConstantPattern { + PatternData::Constant(alloc.alloc(ConstantPattern { data, pos: pattern.pos, })) @@ -270,30 +270,31 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> { Term::Bool(b) => Node::Bool(*b), Term::Num(n) => alloc.number(n.clone()), Term::Str(s) => alloc.string(s), - Term::StrChunks(chunks) => alloc.str_chunks( - chunks - .iter() - .map(|chunk| match chunk { - term::StrChunk::Literal(s) => StrChunk::Literal(s.clone()), - term::StrChunk::Expr(expr, indent) => { - StrChunk::Expr(expr.to_ast(alloc), *indent) - } - }) - .rev(), - ), + Term::StrChunks(chunks) => { + alloc.string_chunks(chunks.iter().rev().map(|chunk| match chunk { + term::StrChunk::Literal(s) => StringChunk::Literal(s.clone()), + term::StrChunk::Expr(expr, indent) => { + StringChunk::Expr(expr.to_ast(alloc), *indent) + } + })) + } Term::Fun(id, body) => alloc.fun(Pattern::any(*id), body.to_ast(alloc)), Term::FunPattern(pat, body) => alloc.fun(pat.to_ast(alloc), body.to_ast(alloc)), - Term::Let(bindings, body, attrs) => alloc.let_binding( - bindings - .iter() - .map(|(id, term)| (Pattern::any(*id), term.to_ast(alloc))), + Term::Let(bindings, body, attrs) => alloc.let_block( + bindings.iter().map(|(id, value)| LetBinding { + pattern: Pattern::any(*id), + value: value.to_ast(alloc), + metadata: Default::default(), + }), body.to_ast(alloc), attrs.rec, ), - Term::LetPattern(bindings, body, attrs) => alloc.let_binding( - bindings - .iter() - .map(|(pat, term)| (pat.to_ast(alloc), term.to_ast(alloc))), + Term::LetPattern(bindings, body, attrs) => alloc.let_block( + bindings.iter().map(|(pat, value)| LetBinding { + pattern: pat.to_ast(alloc), + value: value.to_ast(alloc), + metadata: Default::default(), + }), body.to_ast(alloc), attrs.rec, ), @@ -352,26 +353,21 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> { } })); - field_defs.extend( - dyn_fields - .iter() - .map(|(expr, field)| { - let pos_field_name = expr.pos; - let pos = field.value.as_ref().map(|v| pos_field_name.fuse(v.pos)).unwrap_or(pos_field_name); - - if let Node::StrChunks(chunks) = Ast::from_mainline(alloc, expr).node { - record::FieldDef { - path: record::FieldPathElem::single_expr_path(alloc, chunks, pos_field_name), - metadata: field.metadata.to_ast(alloc), - value: field.value.as_ref().map(|term| term.to_ast(alloc)), - pos, - } - } - else { - panic!("expected string chunks to be the only valid option for a dynamic field, but got something else") - } - }) - ); + field_defs.extend(dyn_fields.iter().map(|(expr, field)| { + let pos_field_name = expr.pos; + let pos = field + .value + .as_ref() + .map(|v| pos_field_name.fuse(v.pos)) + .unwrap_or(pos_field_name); + + record::FieldDef { + path: record::FieldPathElem::single_expr_path(alloc, expr.to_ast(alloc)), + metadata: field.metadata.to_ast(alloc), + value: field.value.as_ref().map(|term| term.to_ast(alloc)), + pos, + } + })); alloc.record(Record { field_defs: alloc.alloc_iter(field_defs), @@ -831,11 +827,11 @@ impl<'ast> FromAst> for term::TypeAnnotation { } } -impl<'ast> FromAst>> for term::StrChunk { - fn from_ast(chunk: &StrChunk>) -> Self { +impl<'ast> FromAst>> for term::StrChunk { + fn from_ast(chunk: &StringChunk>) -> Self { match chunk { - StrChunk::Literal(s) => term::StrChunk::Literal(s.clone()), - StrChunk::Expr(expr, indent) => term::StrChunk::Expr(expr.to_mainline(), *indent), + StringChunk::Literal(s) => term::StrChunk::Literal(s.clone()), + StringChunk::Expr(expr, indent) => term::StrChunk::Expr(expr.to_mainline(), *indent), } } } @@ -844,17 +840,14 @@ impl<'ast> FromAst>> for term::StrChunk { /// or a quoted identifier. pub enum FieldName { Ident(LocIdent), - Expr(Vec>, TermPos), + Expr(term::RichTerm), } impl FromAst> for FieldName { fn from_ast(elem: &record::FieldPathElem<'_>) -> Self { match elem { record::FieldPathElem::Ident(id) => FieldName::Ident(*id), - record::FieldPathElem::Expr(chunks, pos) => { - let chunks = chunks.iter().map(ToMainline::to_mainline).collect(); - FieldName::Expr(chunks, *pos) - } + record::FieldPathElem::Expr(node) => FieldName::Expr(node.to_mainline()), } } } @@ -868,15 +861,19 @@ impl<'ast> FromAst> for (FieldName, term::record::Field) /// - /!\ path must be **non-empty**, otherwise this function panics use super::record::FieldPathElem; - let mut it = field.path.iter(); - let fst = it.next().unwrap(); + // unwrap(): field paths must be non-empty + let name_innermost = field.path.last().unwrap().try_as_ident(); let initial = term::record::Field { value: field.value.as_ref().map(ToMainline::to_mainline), - metadata: field.metadata.to_mainline(), + metadata: term::record::FieldMetadata::from_ast(&field.metadata) + .with_field_name(name_innermost), pending_contracts: Vec::new(), }; + let mut it = field.path.iter(); + let fst = it.next().unwrap(); + let content = it.rev().fold(initial, |acc, path_elem| { // We first compute a position for the intermediate generated records (it's useful // in particular for the LSP). The position starts at the subpath corresponding to @@ -899,11 +896,10 @@ impl<'ast> FromAst> for (FieldName, term::record::Field) pos, )) } - FieldPathElem::Expr(chunks, pos) => { - let pos = *pos; - let chunks: Vec<_> = chunks.iter().map(|chunk| chunk.to_mainline()).collect(); - let exp = term::RichTerm::new(term::Term::StrChunks(chunks), pos); - let static_access = exp.as_ref().try_str_chunk_as_static_str(); + FieldPathElem::Expr(expr) => { + let pos = expr.pos; + let expr = term::RichTerm::from_ast(expr); + let static_access = expr.as_ref().try_str_chunk_as_static_str(); if let Some(static_access) = static_access { let id = LocIdent::new_with_pos(static_access, pos); @@ -925,7 +921,7 @@ impl<'ast> FromAst> for (FieldName, term::record::Field) term::record::Field::from(term::RichTerm::new( term::Term::RecRecord( term::record::RecordData::empty(), - vec![(exp, acc)], + vec![(expr, acc)], None, ), pos, @@ -1196,12 +1192,13 @@ impl<'ast> FromAst> for term::Term { Node::Bool(b) => Term::Bool(*b), Node::Number(n) => Term::Num((**n).clone()), Node::String(s) => Term::Str((*s).into()), - Node::StrChunks(chunks) => { + Node::StringChunks(chunks) => { let chunks = chunks .iter() + .rev() .map(|chunk| match chunk { - StrChunk::Literal(s) => term::StrChunk::Literal(s.clone()), - StrChunk::Expr(expr, indent) => { + StringChunk::Literal(s) => term::StrChunk::Literal(s.clone()), + StringChunk::Expr(expr, indent) => { term::StrChunk::Expr(expr.to_mainline(), *indent) } }) @@ -1218,14 +1215,37 @@ impl<'ast> FromAst> for term::Term { body, rec, } => { + // Mainline term bindings can't have any metadata associated with them. We need to + // rewrite let metadata to be free-standing type and contract annotations instead, + // which is achieved by this helper. + fn with_metadata(metadata: &LetMetadata<'_>, value: &Ast<'_>) -> term::RichTerm { + let value: term::RichTerm = value.to_mainline(); + let pos = value.pos; + + if metadata.annotation.is_empty() { + return value; + } + + term::RichTerm::new( + term::Term::Annotated(metadata.annotation.to_mainline(), value), + pos, + ) + } + // We try to collect all patterns as single identifiers. If this works, we can emit // a simpler / more compact `Let`. let try_bindings = bindings .iter() - .map(|(pat, term)| match pat.data { - PatternData::Any(id) => Some((id, term.to_mainline())), - _ => None, - }) + .map( + |LetBinding { + pattern, + metadata, + value, + }| match pattern.data { + PatternData::Any(id) => Some((id, with_metadata(metadata, value))), + _ => None, + }, + ) .collect::>>(); let body = body.to_mainline(); @@ -1239,30 +1259,33 @@ impl<'ast> FromAst> for term::Term { } else { let bindings = bindings .iter() - .map(|(pat, term)| (pat.to_mainline(), term.to_mainline())) + .map( + |LetBinding { + pattern, + value, + metadata, + }| { + (pattern.to_mainline(), with_metadata(metadata, value)) + }, + ) .collect(); Term::LetPattern(bindings, body, attrs) } } - Node::App { fun, args } => { - // unwrap(): the position of Ast should always be set (we might move to `RawSpan` - // instead of `TermPos` soon) - let fun_span = fun.pos.unwrap(); + Node::App { head: fun, args } => { + let fun_pos = fun.pos; let rterm = args.iter().fold(fun.to_mainline(), |result, arg| { // This case is a bit annoying: we need to extract the position of the sub // application to satisfy the old AST structure, but this information isn't // available directly. + // // What we do here is to fuse the span of the term being built and the one of // the current argument, which should be a reasonable approximation (if not // exactly the same thing). - // unwrap(): the position of Ast should always be set (we might move to `RawSpan` - // instead of `TermPos` soon) - let span_arg = arg.pos.unwrap(); - let span = fun_span.fuse(span_arg); - - term::RichTerm::new(Term::App(result, arg.to_mainline()), span.into()) + let arg_pos = arg.pos; + term::RichTerm::new(Term::App(result, arg.to_mainline()), fun_pos.fuse(arg_pos)) }); rterm.term.into_owned() @@ -1404,7 +1427,8 @@ impl<'ast> FromAst> for def in record.field_defs.iter().map(ToMainline::to_mainline) { match def { (FieldName::Ident(id), field) => insert_static_field(&mut static_fields, id, field), - (FieldName::Expr(e, pos), field) => { + (FieldName::Expr(expr), field) => { + let pos = expr.pos; // Dynamic fields (whose name is defined by an interpolated string) have a different // semantics than fields whose name can be determined statically. However, static // fields with special characters are also parsed as string chunks: @@ -1416,8 +1440,7 @@ impl<'ast> FromAst> // Here, both fields are parsed as `StrChunks`, but the first field is actually a // static one, just with special characters. The following code determines which fields // are actually static or not, and inserts them in the right location. - let rt = term::RichTerm::new(term::Term::StrChunks(e), pos); - let static_access = rt.term.as_ref().try_str_chunk_as_static_str(); + let static_access = expr.term.as_ref().try_str_chunk_as_static_str(); if let Some(static_access) = static_access { insert_static_field( @@ -1426,7 +1449,7 @@ impl<'ast> FromAst> field, ) } else { - dynamic_fields.push((rt, field)); + dynamic_fields.push((expr, field)); } } } diff --git a/core/src/bytecode/ast/mod.rs b/core/src/bytecode/ast/mod.rs index ca360b2bb9..16b38f73ab 100644 --- a/core/src/bytecode/ast/mod.rs +++ b/core/src/bytecode/ast/mod.rs @@ -1,8 +1,8 @@ -//! The Nickel AST, as ingested by the bytecode compiler. +//! The Nickel AST, as ingested by the (future) bytecode compiler. //! //! Since the AST is built once for each Nickel expression and is then compiled away to bytecode, -//! the number nodes ever allocated should be reasonably bounded by the input program size. Thus, -//! for performance reasons, we allocate notes using an arena and keep them alive until the end of +//! the total number of allocated nodes is reasonably bounded by the input program size. Thus, for +//! performance reasons, we allocate notes using an arena and keep them alive until the end of //! compilation. In return, we get fast allocation and de-allocation, and we can easily reference //! other nodes and data structures using native references. //! @@ -26,7 +26,7 @@ use crate::{ }; // For now, we reuse those types from the term module. -pub use crate::term::{Number, StrChunk}; +pub use crate::term::{MergePriority, Number, StrChunk as StringChunk}; use bumpalo::Bump; @@ -42,16 +42,6 @@ use pattern::*; use primop::PrimOp; use typ::*; -/// A Nickel AST. Contains a root node and a span. -/// -//TODO: we don't expect to access the span much on the happy path. Should we add an indirection -//through a reference? -#[derive(Clone, Debug, PartialEq)] -pub struct Ast<'ast> { - node: Node<'ast>, - pos: TermPos, -} - /// A node of the Nickel AST. /// /// Nodes are built by the parser and then mostly traversed immutably. Such nodes are optimized for @@ -85,7 +75,7 @@ pub enum Node<'ast> { /// /// As opposed to [crate::term::Term::StrChunks], the chunks are stored in the original order: /// `"hello%{var}"` will give `["hello", var]`. - StrChunks(&'ast [StrChunk>]), + StringChunks(&'ast [StringChunk>]), /// A function. Fun { @@ -93,16 +83,16 @@ pub enum Node<'ast> { body: &'ast Ast<'ast>, }, - /// A let-binding. + /// A let block. Let { - bindings: &'ast [(Pattern<'ast>, Ast<'ast>)], + bindings: &'ast [LetBinding<'ast>], body: &'ast Ast<'ast>, rec: bool, }, /// An application to one or more arguments. App { - fun: &'ast Ast<'ast>, + head: &'ast Ast<'ast>, args: &'ast [Ast<'ast>], }, @@ -163,6 +153,85 @@ pub enum Node<'ast> { ParseError(&'ast ParseError), } +/// An individual binding in a let block. +#[derive(Debug, Clone, PartialEq)] +pub struct LetBinding<'ast> { + pub pattern: Pattern<'ast>, + pub metadata: LetMetadata<'ast>, + pub value: Ast<'ast>, +} + +/// The metadata that can be attached to a let. It's a subset of [record::FieldMetadata]. +#[derive(Debug, Default, Clone, PartialEq)] +pub struct LetMetadata<'ast> { + pub doc: Option>, + pub annotation: Annotation<'ast>, +} + +impl<'ast> From> for record::FieldMetadata<'ast> { + fn from(let_metadata: LetMetadata<'ast>) -> Self { + record::FieldMetadata { + annotation: let_metadata.annotation, + doc: let_metadata.doc, + ..Default::default() + } + } +} + +impl<'ast> TryFrom> for LetMetadata<'ast> { + type Error = (); + + fn try_from(field_metadata: record::FieldMetadata<'ast>) -> Result { + if let record::FieldMetadata { + doc, + annotation, + opt: false, + not_exported: false, + priority: MergePriority::Neutral, + } = field_metadata + { + Ok(LetMetadata { doc, annotation }) + } else { + Err(()) + } + } +} + +impl<'ast> Node<'ast> { + /// Tries to extract a static literal from string chunks. + /// + /// This methods returns a `Some(..)` when the term is a [Node::StringChunks] and all the + /// chunks are [StringChunk::Literal] + pub fn try_str_chunk_as_static_str(&self) -> Option { + match self { + Node::StringChunks(chunks) => StringChunk::try_chunks_as_static_str(*chunks), + _ => None, + } + } + + /// Attaches a position to this node turning it into an [Ast]. + pub fn spanned(self, pos: TermPos) -> Ast<'ast> { + Ast { node: self, pos } + } +} + +/// A Nickel AST. Contains a root node and a span. +/// +//TODO: we don't expect to access the span much on the happy path. Should we add an indirection +//through a reference? +#[derive(Clone, Debug, PartialEq)] +pub struct Ast<'ast> { + pub node: Node<'ast>, + pub pos: TermPos, +} + +impl<'ast> Ast<'ast> { + /// Sets a new position for this AST node. + pub fn with_pos(self, pos: TermPos) -> Self { + Ast { pos, ..self } + } +} + /// A branch of a match expression. #[derive(Debug, PartialEq, Clone)] pub struct MatchBranch<'ast> { @@ -288,6 +357,10 @@ impl AstAlloc { self.generic_arena.alloc_slice_fill_iter(iter) } + pub fn alloc_str<'ast>(&'ast self, s: &str) -> &'ast str { + self.generic_arena.alloc_str(s) + } + pub fn node<'ast>(&'ast self, node: Node<'ast>) -> &'ast Node<'ast> { self.generic_arena.alloc(node) } @@ -296,16 +369,20 @@ impl AstAlloc { Node::Number(self.number_arena.alloc(number)) } + pub fn number_move(&self, number: Number) -> &'_ Number { + self.number_arena.alloc(number) + } + pub fn string<'ast>(&'ast self, s: &str) -> Node<'ast> { Node::String(self.generic_arena.alloc_str(s)) } - pub fn str_chunks<'ast, I>(&'ast self, chunks: I) -> Node<'ast> + pub fn string_chunks<'ast, I>(&'ast self, chunks: I) -> Node<'ast> where - I: IntoIterator>>, + I: IntoIterator>>, I::IntoIter: ExactSizeIterator, { - Node::StrChunks(self.generic_arena.alloc_slice_fill_iter(chunks)) + Node::StringChunks(self.generic_arena.alloc_slice_fill_iter(chunks)) } pub fn fun<'ast>(&'ast self, pat: Pattern<'ast>, body: Ast<'ast>) -> Node<'ast> { @@ -314,9 +391,23 @@ impl AstAlloc { Node::Fun { arg, body } } - pub fn let_binding<'ast, I>(&'ast self, bindings: I, body: Ast<'ast>, rec: bool) -> Node<'ast> + pub fn nary_fun<'ast, I>(&'ast self, args: I, body: Ast<'ast>) -> Node<'ast> + where + I: IntoIterator>, + I::IntoIter: DoubleEndedIterator, + { + args.into_iter() + .rev() + .fold(body, |body, arg| Ast { + node: self.fun(arg, body), + pos: TermPos::None, + }) + .node + } + + pub fn let_block<'ast, I>(&'ast self, bindings: I, body: Ast<'ast>, rec: bool) -> Node<'ast> where - I: IntoIterator, Ast<'ast>)>, + I: IntoIterator>, I::IntoIter: ExactSizeIterator, { let bindings = self.generic_arena.alloc_slice_fill_iter(bindings); @@ -329,13 +420,13 @@ impl AstAlloc { } } - pub fn app<'ast, I>(&'ast self, fun: Ast<'ast>, args: I) -> Node<'ast> + pub fn app<'ast, I>(&'ast self, head: Ast<'ast>, args: I) -> Node<'ast> where I: IntoIterator>, I::IntoIter: ExactSizeIterator, { Node::App { - fun: self.generic_arena.alloc(fun), + head: self.generic_arena.alloc(head), args: self.generic_arena.alloc_slice_fill_iter(args), } } @@ -437,14 +528,20 @@ impl AstAlloc { Node::Import(Import::Package { id }) } - /// As opposed to [Self::typ], this method takes an already constructed type and move it into - /// the arena, instead of taking each constituent separately. pub fn typ<'ast>(&'ast self, typ: Type<'ast>) -> Node<'ast> { Node::Type(self.generic_arena.alloc(typ)) } - pub fn typ_from_unr<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> Node<'ast> { - Node::Type(self.generic_arena.alloc(Type { typ, pos })) + pub fn type_from_unr<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> Node<'ast> { + Node::Type(self.type_move(Type { typ, pos })) + } + + pub fn type_data<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> &'ast Type<'ast> { + self.type_move(Type { typ, pos }) + } + + pub fn type_move<'ast>(&'ast self, typ: Type<'ast>) -> &'ast Type<'ast> { + self.generic_arena.alloc(typ) } pub fn types<'ast, I>(&'ast self, types: I) -> &'ast [Type<'ast>] @@ -459,10 +556,25 @@ impl AstAlloc { self.generic_arena.alloc(EnumRows(erows)) } + pub fn enum_rows_move<'ast>(&'ast self, erows: EnumRows<'ast>) -> &'ast EnumRows<'ast> { + self.generic_arena.alloc(erows) + } + pub fn record_rows<'ast>(&'ast self, rrows: RecordRowsUnr<'ast>) -> &'ast RecordRows<'ast> { self.generic_arena.alloc(RecordRows(rrows)) } + pub fn record_rows_move<'ast>(&'ast self, rrows: RecordRows<'ast>) -> &'ast RecordRows<'ast> { + self.generic_arena.alloc(rrows) + } + + pub fn record_row<'ast>(&'ast self, id: LocIdent, typ: Type<'ast>) -> &'ast RecordRow<'ast> { + self.generic_arena.alloc(RecordRow { + id, + typ: self.generic_arena.alloc(typ), + }) + } + pub fn parse_error(&self, error: ParseError) -> Node<'_> { Node::ParseError(self.error_arena.alloc(error)) } @@ -475,6 +587,14 @@ impl AstAlloc { self.generic_arena.alloc(pattern) } + pub fn patterns<'ast, I>(&'ast self, patterns: I) -> &'ast [Pattern<'ast>] + where + I: IntoIterator>, + I::IntoIter: ExactSizeIterator, + { + self.generic_arena.alloc_slice_fill_iter(patterns) + } + pub fn enum_pattern<'ast>( &'ast self, enum_pattern: EnumPattern<'ast>, @@ -489,6 +609,14 @@ impl AstAlloc { self.generic_arena.alloc(field_pat) } + pub fn field_patterns<'ast, I>(&'ast self, field_pats: I) -> &'ast [FieldPattern<'ast>] + where + I: IntoIterator>, + I::IntoIter: ExactSizeIterator, + { + self.generic_arena.alloc_slice_fill_iter(field_pats) + } + pub fn record_pattern<'ast, I>( &'ast self, patterns: I, @@ -499,10 +627,8 @@ impl AstAlloc { I: IntoIterator>, I::IntoIter: ExactSizeIterator, { - let patterns = self.generic_arena.alloc_slice_fill_iter(patterns); - self.generic_arena.alloc(RecordPattern { - patterns, + patterns: self.field_patterns(patterns), tail, pos, }) @@ -518,22 +644,13 @@ impl AstAlloc { I: IntoIterator>, I::IntoIter: ExactSizeIterator, { - let patterns = self.generic_arena.alloc_slice_fill_iter(patterns); - self.generic_arena.alloc(ArrayPattern { - patterns, + patterns: self.patterns(patterns), tail, pos, }) } - pub fn constant_pattern<'ast>( - &'ast self, - cst_pat: ConstantPattern<'ast>, - ) -> &'ast ConstantPattern<'ast> { - self.generic_arena.alloc(cst_pat) - } - pub fn or_pattern<'ast, I>(&'ast self, patterns: I, pos: TermPos) -> &'ast OrPattern<'ast> where I: IntoIterator>, diff --git a/core/src/bytecode/ast/pattern/mod.rs b/core/src/bytecode/ast/pattern/mod.rs index abd23adbc7..e682c58cf4 100644 --- a/core/src/bytecode/ast/pattern/mod.rs +++ b/core/src/bytecode/ast/pattern/mod.rs @@ -66,28 +66,6 @@ pub struct FieldPattern<'ast> { pub pos: TermPos, } -/// The last match in a data structure pattern. This can either be a normal match, or an ellipsis -/// which can capture the rest of the data structure. The type parameter `P` is the type of the -/// pattern of the data structure: currently, ellipsis matches are only supported for record, but -/// we'll probably support them for arrays as well. -/// -/// This enum is mostly used during parsing. -/// -/// # Example -/// -/// - In `{foo={}, bar}`, the last match is an normal match. -/// - In `{foo={}, bar, ..}`, the last match is a non-capturing ellipsis. -/// - In `{foo={}, bar, ..rest}`, the last match is a capturing ellipsis. -#[derive(Debug, PartialEq, Clone)] -pub enum LastPattern<'ast, P> { - /// The last field is a normal match. In this case the pattern is "closed" so every record - /// fields should be matched. - Normal(&'ast P), - /// The pattern is "open" `, ..}`. Optionally you can bind a record containing the remaining - /// fields to an `Identifier` using the syntax `, ..y}`. - Ellipsis(Option), -} - /// A record pattern. #[derive(Debug, PartialEq, Clone)] pub struct RecordPattern<'ast> { diff --git a/core/src/bytecode/ast/record.rs b/core/src/bytecode/ast/record.rs index 7d67648d52..4b8118a2a1 100644 --- a/core/src/bytecode/ast/record.rs +++ b/core/src/bytecode/ast/record.rs @@ -1,4 +1,4 @@ -use super::{Annotation, Ast, AstAlloc, StrChunk}; +use super::{Annotation, Ast, AstAlloc}; use crate::{identifier::LocIdent, position::TermPos}; @@ -13,8 +13,12 @@ use std::rc::Rc; pub enum FieldPathElem<'ast> { /// A statically known identifier. Ident(LocIdent), - /// A dynamic field name written as a quoted expression, e.g. `"%{protocol}" = .. `. - Expr(&'ast [StrChunk>], TermPos), + /// A dynamic field name written as a quoted expression, e.g. `"%{protocol}" = .. `. Normally, + /// the expression must be a [crate::bytecode::ast::Node::StringChunks], so we could store the + /// chunks directly which would be more precise. However, it's useful to keep a general + /// [crate::bytecode::ast::Ast] to store errors when part of the field path failed to parse + /// correctly. + Expr(Ast<'ast>), } impl<'ast> FieldPathElem<'ast> { @@ -22,7 +26,7 @@ impl<'ast> FieldPathElem<'ast> { pub fn pos(&self) -> TermPos { match self { FieldPathElem::Ident(ident) => ident.pos, - FieldPathElem::Expr(_, pos) => *pos, + FieldPathElem::Expr(expr) => expr.pos, } } @@ -35,12 +39,19 @@ impl<'ast> FieldPathElem<'ast> { } /// Crate a path composed of a single dynamic expression. - pub fn single_expr_path( - alloc: &'ast AstAlloc, - expr: &'ast [StrChunk>], - pos: TermPos, - ) -> &'ast [FieldPathElem<'ast>] { - alloc.alloc_iter(std::iter::once(FieldPathElem::Expr(expr, pos))) + pub fn single_expr_path(alloc: &'ast AstAlloc, expr: Ast<'ast>) -> &'ast [FieldPathElem<'ast>] { + alloc.alloc_iter(std::iter::once(FieldPathElem::Expr(expr))) + } + + /// Try to interpret this element element as a static identifier. Returns `None` if the the + /// element is an expression with interpolation inside. + pub fn try_as_ident(&self) -> Option { + match self { + FieldPathElem::Ident(ident) => Some(*ident), + FieldPathElem::Expr(expr) => { + expr.node.try_str_chunk_as_static_str().map(LocIdent::from) + } + } } } @@ -62,8 +73,8 @@ impl<'ast> FieldDef<'ast> { /// Returns the identifier corresponding to this definition if the path is composed of exactly /// one element which is a static identifier. Returns `None` otherwise. pub fn path_as_ident(&self) -> Option { - if let [FieldPathElem::Ident(ident)] = self.path { - Some(*ident) + if let [elem] = self.path { + elem.try_as_ident() } else { None } diff --git a/core/src/bytecode/ast/typ.rs b/core/src/bytecode/ast/typ.rs index 2078f3d283..807064cdc5 100644 --- a/core/src/bytecode/ast/typ.rs +++ b/core/src/bytecode/ast/typ.rs @@ -40,3 +40,16 @@ impl<'ast> From> for Type<'ast> { } } } + +impl<'ast> Type<'ast> { + /// Sets a new position for this type. + pub fn with_pos(self, pos: TermPos) -> Type<'ast> { + Type { pos, ..self } + } +} + +impl<'ast> TypeUnr<'ast> { + pub fn spanned(self, pos: TermPos) -> Type<'ast> { + Type { typ: self, pos } + } +} diff --git a/core/src/cache.rs b/core/src/cache.rs index 1d179b0e68..52f872d352 100644 --- a/core/src/cache.rs +++ b/core/src/cache.rs @@ -9,7 +9,7 @@ use crate::metrics::measure_runtime; #[cfg(feature = "nix-experimental")] use crate::nix_ffi; use crate::package::PackageMap; -use crate::parser::{lexer::Lexer, ErrorTolerantParser}; +use crate::parser::{lexer::Lexer, ErrorTolerantParserCompat}; use crate::position::TermPos; use crate::program::FieldPath; use crate::stdlib::{self as nickel_stdlib, StdlibModule}; @@ -586,7 +586,8 @@ impl Cache { InputFormat::Nickel => { let (t, parse_errs) = measure_runtime!( "runtime:parse:nickel", - parser::grammar::TermParser::new().parse_tolerant(file_id, Lexer::new(buf))? + parser::grammar::TermParser::new() + .parse_tolerant_compat(file_id, Lexer::new(buf))? ); Ok((t, parse_errs)) @@ -1717,7 +1718,7 @@ pub mod resolvers { if let hash_map::Entry::Vacant(e) = self.term_cache.entry(file_id) { let buf = self.files.source(file_id); let term = parser::grammar::TermParser::new() - .parse_strict(file_id, Lexer::new(buf)) + .parse_strict_compat(file_id, Lexer::new(buf)) .map_err(|e| ImportError::ParseErrors(e, *pos))?; e.insert(term); Ok(( diff --git a/core/src/combine.rs b/core/src/combine.rs index 45950e4d86..f508089b79 100644 --- a/core/src/combine.rs +++ b/core/src/combine.rs @@ -1,6 +1,8 @@ //! Module for the Combine trait //! -//! Defines the `Combine` trait. +//! Defines `Combine` traits. + +use crate::bytecode::ast::AstAlloc; /// Trait for structures representing a series of annotation that can be combined (flattened). /// Pedantically, `Combine` is just a monoid. @@ -8,3 +10,31 @@ pub trait Combine: Default { /// Combine two elements. fn combine(left: Self, right: Self) -> Self; } + +/// [Combine] doens't work for new ast nodes, which requires an external allocator to create new +/// nodes. This trait is a version that takes this additional allocator. It's temporary: I suspect +/// we won't need the original general [Combine] trait once we move to the bytecode VM, as +/// [Combine] is used mostly on ast-like data, and we will rename [CombineAlloc] to [Combine]. +pub trait CombineAlloc<'ast> { + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self; +} + +impl Combine for Option { + fn combine(left: Self, right: Self) -> Self { + match (left, right) { + (None, None) => None, + (None, Some(x)) | (Some(x), None) => Some(x), + (Some(left), Some(right)) => Some(Combine::combine(left, right)), + } + } +} + +impl<'ast, T: CombineAlloc<'ast>> CombineAlloc<'ast> for Option { + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { + match (left, right) { + (None, None) => None, + (None, Some(x)) | (Some(x), None) => Some(x), + (Some(left), Some(right)) => Some(CombineAlloc::combine(alloc, left, right)), + } + } +} diff --git a/core/src/error/mod.rs b/core/src/error/mod.rs index bca92750c9..7d7cacf686 100644 --- a/core/src/error/mod.rs +++ b/core/src/error/mod.rs @@ -1733,7 +1733,7 @@ mod blame_error { /// and calls `ty_path::span`. This new type is guaranteed to have all of its positions set, /// providing a definite `PathSpan`. This is similar to the behavior of [`super::primary_alt`]. pub fn path_span(files: &mut Files, path: &[ty_path::Elem], ty: &Type) -> PathSpan { - use crate::parser::{grammar::FixedTypeParser, lexer::Lexer, ErrorTolerantParser}; + use crate::parser::{grammar::FixedTypeParser, lexer::Lexer, ErrorTolerantParserCompat}; ty_path::span(path.iter().peekable(), ty) .or_else(|| { @@ -1741,7 +1741,7 @@ mod blame_error { let file_id = files.add(super::UNKNOWN_SOURCE_NAME, type_pprinted.clone()); let ty_with_pos = FixedTypeParser::new() - .parse_strict(file_id, Lexer::new(&type_pprinted)) + .parse_strict_compat(file_id, Lexer::new(&type_pprinted)) .unwrap(); ty_path::span(path.iter().peekable(), &ty_with_pos) diff --git a/core/src/eval/merge.rs b/core/src/eval/merge.rs index d2336015c5..a461ad0ab3 100644 --- a/core/src/eval/merge.rs +++ b/core/src/eval/merge.rs @@ -456,6 +456,8 @@ fn merge_fields<'a, C: Cache, I: DoubleEndedIterator + Clon /// This function is parametrized temporarily to accomodate both the mainline Nickel AST /// ([crate::term::Term]) where documentation is represented as a `String`, and the new bytecode /// AST where documentation is represented as an `Rc`. +//FIXME: remove the type parameter `D` once we've moved evaluation to the new bytecode VM. +//Currently we need to handle both the old representation `D=String` and the new one `D=Rc`. pub(crate) fn merge_doc(doc1: Option, doc2: Option) -> Option { //FIXME: how to merge documentation? Just concatenate? doc1.or(doc2) diff --git a/core/src/eval/tests.rs b/core/src/eval/tests.rs index 412ef1ff8f..49e0b80ded 100644 --- a/core/src/eval/tests.rs +++ b/core/src/eval/tests.rs @@ -4,7 +4,7 @@ use crate::cache::resolvers::{DummyResolver, SimpleResolver}; use crate::error::{ImportError, NullReporter}; use crate::files::Files; use crate::label::Label; -use crate::parser::{grammar, lexer, ErrorTolerantParser}; +use crate::parser::{grammar, lexer, ErrorTolerantParserCompat}; use crate::term::make as mk_term; use crate::term::Number; use crate::term::{BinaryOp, StrChunk, UnaryOp}; @@ -30,7 +30,7 @@ fn parse(s: &str) -> Option { let id = Files::new().add("", String::from(s)); grammar::TermParser::new() - .parse_strict(id, lexer::Lexer::new(s)) + .parse_strict_compat(id, lexer::Lexer::new(s)) .map(RichTerm::without_pos) .map_err(|err| println!("{err:?}")) .ok() diff --git a/core/src/identifier.rs b/core/src/identifier.rs index c44a3b74c7..84df011d7d 100644 --- a/core/src/identifier.rs +++ b/core/src/identifier.rs @@ -41,6 +41,15 @@ impl Ident { increment!("Ident::fresh"); Self::new(format!("{}{}", GEN_PREFIX, GeneratedCounter::next())) } + + /// Attaches a position to this identifier, making it a `LocIdent`. + pub fn spanned(self, pos: TermPos) -> LocIdent { + LocIdent { + ident: self, + pos, + generated: self.label().starts_with(GEN_PREFIX), + } + } } impl fmt::Display for Ident { @@ -57,11 +66,7 @@ impl fmt::Debug for Ident { impl From for LocIdent { fn from(ident: Ident) -> Self { - LocIdent { - ident, - pos: TermPos::None, - generated: ident.label().starts_with(GEN_PREFIX), - } + ident.spanned(TermPos::None) } } diff --git a/core/src/lib.rs b/core/src/lib.rs index dc38f338f6..59031df64a 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -1,4 +1,3 @@ -#[cfg(feature = "bytecode-experimental")] pub mod bytecode; pub mod cache; pub mod closurize; diff --git a/core/src/parser/error.rs b/core/src/parser/error.rs index 2a05bbc111..834d9844e5 100644 --- a/core/src/parser/error.rs +++ b/core/src/parser/error.rs @@ -21,10 +21,10 @@ pub enum LexicalError { Generic(Range), } -/// Error indicating that a construct is not allowed when trying to interpret an `UniRecord` as a +/// Error indicating that a construct is not allowed when trying to interpret a `UniRecord` as a /// record type in a strict way. /// -/// See [`UniRecord::into_type_strict`](crate::parser::uniterm::UniRecord::into_type_strict). +/// See `parser::uniterm::UniRecord::into_type_strict`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum InvalidRecordTypeError { /// The record type had an invalid field, for example because it had a contract, diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index d8a9495a69..c093b25a71 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -17,9 +17,10 @@ //! This is not the case of all rules. Record literals and variables can both be //! interpreted in a different way, depending on how their usage. In //! `x : {foo : Num}`, `{foo : Num}` is interpreted as a record type. In `{foo : -//! Num}.foo`, it is a record literal with a missing definition for `foo`. The -//! first interpretation is **not** equivalent to first interpreting it as a -//! term, and then as a type. +//! Num}.foo`, it is a record literal with a missing definition for `foo` (note: +//! this latter form is now forbidden in the syntax). The first interpretation +//! is **not** equivalent to first interpreting it as a term, and then as a +//! type. //! //! For those reasons, the `uniterm` module introduces a new AST definition, that //! just wraps `RichTerm` and `Type`, together with dedicated variants for the @@ -31,12 +32,14 @@ //! //! In consequence, this grammar uses three main types `RichTerm`, `Type` and //! `UniTerm`, as well as conversion macros `AsTerm`, `AsType` and `AsUniTerm`. -//! Some rules that are known to only produce `RichTerm` or `Type` may have the +//! +//! Rules that are known to only produce `RichTerm` or `Type` may have the //! corresponding more precise return type. Other rules that produce or just //! propagate general uniterms have to return a `UniTerm`. use std::{ ffi::OsString, convert::TryFrom, + iter, }; use lalrpop_util::ErrorRecovery; @@ -51,84 +54,126 @@ use super::{ use crate::{ files::FileId, - mk_app, - mk_opn, - mk_fun, - identifier::LocIdent, - term::{ + identifier::{Ident, LocIdent}, + combine::CombineAlloc, + bytecode::ast::{ *, - record::{RecordAttrs, Field, FieldMetadata}, - array::Array, - make as mk_term, + record::{FieldMetadata, FieldDef, FieldPathElem}, pattern::*, + typ::*, + primop::{PrimOp, RecordOpKind}, }, - typ::*, + typ::{VarKind, DictTypeFlavour}, position::{TermPos, RawSpan}, - label::Label, - combine::Combine, + app, + primop_app, + fun, + label::MergeKind, }; use malachite::num::basic::traits::Zero; -grammar<'input, 'err, 'wcard>( +grammar<'input, 'ast, 'err, 'wcard>( + alloc: &'ast AstAlloc, src_id: FileId, errors: &'err mut Vec, ParseError>>, next_wildcard_id: &'wcard mut usize, ); -WithPos: Rule = => t.with_pos(mk_pos(src_id, l, r)); +// Takes a rule producing a `Node` and automatically attach a position to make it +// an `Ast`. +Spanned: Ast<'ast> = => + node.spanned(mk_pos(src_id, left, right)); + +// Takes a rule producing a `Node` and automatically attach a position to make it +// an `Ast`. +SpannedId: LocIdent = => + id.spanned(mk_pos(src_id, left, right)); + +// Takes a rule producing a `Node` and automatically attach a position to make it +// an `Ast`. +SpannedTy: Type<'ast> = => + ty.spanned(mk_pos(src_id, left, right)); + +// Takes a rule producing a spanned value with a `with_pos` method (can be an +// `Ast`, but not only) and re-assigns the position to the span of the rule. +WithPos: Rule = => + t.with_pos(mk_pos(src_id, left, right)); -AsTerm: RichTerm = > =>? - RichTerm::try_from(ut) +AsTerm: Ast<'ast> = > =>? + Ast::try_convert(alloc, ut) .map_err(|e| lalrpop_util::ParseError::User{error: e}); -AsType: Type = > =>? - Type::try_from(ut) +AsType: Type<'ast> = > =>? + Type::try_convert(alloc, ut) .map_err(|e| lalrpop_util::ParseError::User{error: e}); -AsUniTerm: UniTerm = > => UniTerm::from(ut); +// Repeat a rule zero times or more with a separator interspersed, such that the last +// separator is optional: for example, Delimiter will both accept +// `1,2` and `1,2,`. +RepeatSep: Vec = Sep)*> => { + elems.extend(last); + elems +}; + +// Same as `RepeatSep`, but repeat the rule at least once (one or more), instead +// of zero or more. +RepeatSep1: Vec = Sep)*> Sep? => { + elems.push(last); + elems +}; -AnnotSeries: AnnotAtom = => - <>.into_iter().fold(Default::default(), Combine::combine); + +AsUniTerm: UniTerm<'ast> = > => UniTerm::from(ut); + +// Macro repeating a rule producing some form of annotation (that can be +// repeated and combined, typically field metadata). +AnnotSeries: AnnotAtom = => { + <> + .into_iter() + .fold(Default::default(), |acc, next| CombineAlloc::combine(alloc, acc, next)) +}; // A single type or contract annotation. The `Type` rule forbids the use of // constructs that can themselves have annotation on the right, such as a `let`. -// Otherwise, `foo | let x = 1 in x : Num` is ambiguous (the annotation could be -// either `foo | (let x = 1 in (x : Num))` or `(foo | let x = 1 in x) : Num`). +// Otherwise, `foo | let x = 1 in x : Number` is ambiguous (the annotation could +// be either `foo | (let x = 1 in (x : Number))` or `(foo | let x = 1 in x) : +// Number`). // // The rule to use for type annotations is given as a parameter. We always use a // rule that is syntactically equivalent to the `Type` rule. The parameter is // here to control if the type should have its variables fixed now (`FixedType`) // or later (bare `Type`). Almost all rules are of the former kind, and use // `FixedType` (see `FixedType` and `parser::utils::fix_type_vars`). -AnnotAtom: TypeAnnotation = { - "|" => TypeAnnotation { - contracts: vec![LabeledType {typ: ty.clone(), label: mk_label(ty, src_id, l, r)}], +AnnotAtom: Annotation<'ast> = { + "|" => Annotation { + contracts: alloc.types(iter::once(<>)), ..Default::default() }, - ":" => TypeAnnotation { - typ: Some(LabeledType {typ: ty.clone(), label: mk_label(ty, src_id, l, r)}), + ":" => Annotation { + typ: Some(<>), ..Default::default() }, }; // A single metadata annotation attached to a let-binding. Compared to // annotations which can appear everywhere (`AnnotAtom`, either a type or a -// contract annotation), let annotations also include documentation (`doc`). -LetAnnotAtom: LetMetadata = { - > => <>.into(), - "|" "doc" => LetMetadata { - doc: Some(s), +// contract annotation), let annotations also include documentation (`doc`). As +// opposed to record fields, they can't express priority, optionality, etc. +LetAnnotAtom: LetMetadata<'ast> = { + AnnotAtom => LetMetadata { + annotation: <>, + ..Default::default() + }, + "|" "doc" => LetMetadata { + doc: Some(<>.into()), ..Default::default() }, } -// A single field metadata annotation, without the pseudo-metadata (such as -// recursive priorities). -// -// The rule to use for type annotations is given as a parameter (cf AnnotAtom -// rule). -SimpleFieldAnnotAtom: FieldMetadata = { +// A single field metadata annotation. The rule to use for type annotations is +// given as a parameter (cf AnnotAtom rule). +FieldAnnotAtom: FieldMetadata<'ast> = { > => <>.into(), "|" "default" => FieldMetadata { priority: MergePriority::Bottom, @@ -152,16 +197,21 @@ SimpleFieldAnnotAtom: FieldMetadata = { }, } -// A single field metadata annotation. -// -// The rule to use for type annotations is given as a parameter (cf AnnotAtom -// rule). -FieldAnnotAtom: FieldExtAnnot = { - > => <>.into(), // Recursive priorities are disabled as of 1.2.0. Their semantics is non trivial // to adapt to RFC005 that landed in 1.0.0, so they are currently on hold. If we // drop them altogether, we'll have to clean the corresponding code floating // around (not only in the parser, but in the internals module, etc.) +// +// The current `FieldAnnot` was named `SimpleFieldAnnot` before commenting this +// part out. If we restore recursive priorities, we might probably revert to the +// old naming. +// +// // A single field metadata annotation. +// +// // The rule to use for type annotations is given as a parameter (cf AnnotAtom +// // rule). +//FieldAnnotAtom: FieldExtAnnot = { +// > => <>.into(), // "|" "rec" "force" => FieldExtAnnot { // rec_force: true, // ..Default::default() @@ -170,209 +220,215 @@ FieldAnnotAtom: FieldExtAnnot = { // rec_default: true, // ..Default::default() // }, -} +//} // An annotation, with possibly many annotations chained. -Annot: TypeAnnotation = AnnotSeries>>; +Annot: Annotation<'ast> = AnnotSeries>>; // A let annotation, with possibly many annotations chained. Include type // annotations, contract annotations and doc annotations. -LetAnnot: LetMetadata = AnnotSeries>>; - -// A simple field annotation, with possibly many annotations chained. A simple -// field annotation excludes pseudo metadata like recursive priorities operator. -SimpleFieldAnnot: FieldMetadata = AnnotSeries>; +LetAnnot: LetMetadata<'ast> = AnnotSeries>>; // A field annotation, with possibly many annotations chained. -FieldAnnot: FieldExtAnnot = +FieldAnnot: FieldMetadata<'ast> = AnnotSeries>>; -// A general term. Wrap the root of the grammar as a `RichTerm`. -pub Term: RichTerm = AsTerm; +// A general expression. Wrap the root of the grammar as an `Ast`. +pub Term: Ast<'ast> = AsTerm; // A general type. Chosen such that it can't have top-level annotations. // (see `AnnotAtom`) -Type: Type = { +Type: Type<'ast> = { AsType, - Forall, + SpannedTy, }; // A type with type variables fixed. See `parser::utils::fix_type_vars`. // // This rule is public and can be used from external modules to parse an input // directly as a type. -pub FixedType: Type = { - =>? { - ty.fix_type_vars(mk_span(src_id, l, r))?; - Ok(ty) +pub FixedType: Type<'ast> = { + =>? { + Ok(ty.fix_type_vars(alloc, mk_span(src_id, l, r))?) } }; -// Either a term or a top-level let-binding (a let-binding without an `in`). -// Used exclusively for the REPL. -pub ExtendedTerm: ExtendedTerm = { - "let" ?> "=" => { +// Either an expression or a top-level let-binding (a let-binding without an +// `in`). Used exclusively for the REPL. +pub ExtendedTerm: ExtendedTerm> = { + "let" ?> "=" => { if let Some(ann) = ann { - t = ann.annotation.attach_term(t); + exp = ann.annotation.attach_to_ast(alloc, exp); } - ExtendedTerm::ToplevelLet(id, t) + ExtendedTerm::ToplevelLet(id, exp) }, - Term => ExtendedTerm::RichTerm(<>), + Term => ExtendedTerm::Term(<>), }; -LetBinding: LetBinding = { - ?> "=" => { - LetBinding { pattern, annot, value } +LetBinding: LetBinding<'ast> = { + ?> "=" => { + LetBinding { pattern, metadata: metadata.unwrap_or_default(), value } } } // A general uniterm. The root of the grammar. -UniTerm: UniTerm = { +UniTerm: UniTerm<'ast> = { InfixExpr, AnnotatedInfixExpr, - AsUniTerm, - "let" - ",")*> ","? - "in" =>? { - bindings.push(last); - Ok(UniTerm::from(mk_let(recursive.is_some(), bindings, body)?)) + AsUniTerm>, + "let" + + > + "in" =>? { + Ok(UniTerm::from(mk_let( + alloc, + recursive.is_some(), + bindings, + body, + )?)) }, - "fun" "=>" => { + "fun" "=>" => { let pos = mk_pos(src_id, l, r); - let rt = pats.into_iter().rev().fold(t, |t, assgn| RichTerm { - term: SharedTerm::new(mk_fun(assgn, t)), - pos, - }); - UniTerm::from(rt) - }, - "if" "then" "else" => - UniTerm::from(mk_app!(Term::Op1(UnaryOp::IfThenElse, cond), t1, t2)), - => { - UniTerm::from(err) + let expr = pats + .into_iter() + .rev() + .fold(body, |built, next_arg| + alloc.fun(next_arg, built).spanned(pos) + ); + + UniTerm::from(expr) }, + "if" "then" "else" => + UniTerm::from(alloc.if_then_else(cond, e1, e2)), + => UniTerm::from(err), "import" =>? { - Ok(UniTerm::from(mk_import_based_on_filename(s, mk_span(src_id, l, r))?)) + Ok(UniTerm::from(mk_import_based_on_filename(alloc, s, mk_span(src_id, l, r))?)) }, "import" "as" =>? { - Ok(UniTerm::from(mk_import_explicit(s, t, mk_span(src_id, l, r))?)) + Ok(UniTerm::from(mk_import_explicit(alloc, s, t, mk_span(src_id, l, r))?)) }, "import" => { - UniTerm::from(Term::Import(Import::Package { id: pkg.ident() })) + UniTerm::from(alloc.import_package(pkg.ident())) } }; -AnnotatedInfixExpr: UniTerm = { - > > => { - UniTerm::from(ann.attach_term(t)) +AnnotatedInfixExpr: UniTerm<'ast> = { + > > => { + UniTerm::from(ann.attach_to_ast(alloc, e)) }, }; -Forall: Type = - "forall" "." > => { +Forall: TypeUnr<'ast> = + "forall" "." => { ids.into_iter().rev().fold( ty, // The variable kind will be determined during the `fix_type_vars` - // phase. For now, we put a random one (which is also the default - // one, for unused type variables) + // phase. For now, we put an arbitrary one (which is also the + // default one for unused type variables) |acc, var| { let pos = acc.pos; + Type { typ: TypeF::Forall { var, var_kind: VarKind::Type, - body: Box::new(acc) + body: alloc.alloc(acc), }, pos } } - ) + ).typ }; +// The possible heads of function application. The head of a multi-argument +// application is the leftmost part in ` ... `. +ApplicativeHead: UniTerm<'ast> = { + Atom, + AsUniTerm>, + > => UniTerm::from(primop_app!(alloc, op, t)), + > > + => UniTerm::from(primop_app!(alloc, op, t1, t2)), + NOpPre>, + "match" "{" > "}" => UniTerm::from(alloc.match_expr(branches)), +}; + // A n-ary application-like expression (n may be 0, in the sense that this rule // also includes previous levels). -Applicative: UniTerm = { - Atom, - AsUniTerm>, - > > => { - // We special case the application of an enum tag here. In principle, an - // enum variant applied to an argument is of different nature than a - // function application. However, for convenience, we made the syntax - // the same. So we now have to detect cases like `'Foo {x=1}` and - // convert that to a proper enum variant. - let term = if let Term::Enum(tag) = t1.as_ref() { - Term::EnumVariant { - tag: *tag, - arg: t2, - attrs: EnumVariantAttrs::default(), - } +Applicative: UniTerm<'ast> = { + > *> =>? { + // A zero-ary application is just the head. + if args.is_empty() { + Ok(head) } else { - Term::App(t1, t2) - }; - - UniTerm::from(term) + // For a general application, we need the head to be a term. We + // don't support general type applications yet - `Array T` is + // special cased as a type constructor. + let head = Ast::try_convert(alloc, head) .map_err(|e| lalrpop_util::ParseError::User{error: e})?; + + // We special case the application of an enum tag here. In principle, an + // enum variant applied to an argument is of different nature than a + // function application. However, for convenience, we made the syntax + // the same. So we now have to detect cases like `'Foo {x=1}` and + // convert that to a proper enum variant. + if let (Node::EnumVariant { ref tag, arg: None }, 1) + = (&head.node, args.len()) { + Ok(alloc.enum_variant(*tag, args.pop()).into()) + } + else { + Ok(alloc.app(head, args).into()) + } + } }, - > => UniTerm::from(mk_term::op1(op, t)), - > > - => UniTerm::from(mk_term::op2(op, t1, t2)), - NOpPre>, - "match" "{" "}" => { - let branches = branches - .into_iter() - .map(|(branch, _comma)| branch) - .chain(last) - .collect(); - - UniTerm::from(Term::Match(MatchData { branches })) - } }; // The parametrized array type. -TypeArray: Type = "Array" > => +TypeArray: TypeUnr<'ast> = "Array" > => // For some reason, we have to bind the type into a `t` // rather than using the usual `<>` placeholder, otherwise, // it doesn't compile. - Type::from(TypeF::Array(Box::new(t))); + TypeF::Array(alloc.alloc(t)); // A record operation chain, such as `{foo = data}.bar.baz`. -RecordOperationChain: RichTerm = { - > "." => mk_term::op1(UnaryOp::RecordAccess(id), t).with_pos(id.pos), - > "." > => mk_access(t_id, t), +RecordOperationChain: Node<'ast> = { + > "." => + alloc.prim_op(PrimOp::RecordStatAccess(id), iter::once(e)), + > "." > => mk_access(alloc, t_id, e), }; -RecordRowTail: RecordRows = { +RecordRowTail: RecordRows<'ast> = { => RecordRows(RecordRowsF::TailVar(<>)), "Dyn" => RecordRows(RecordRowsF::TailDyn), }; // A record, that can be later interpreted either as a record literal or as a // record type. -UniRecord: UniRecord = { +UniRecord: UniRecord<'ast> = { "{" ",")*> "}" => { - let (last_field, attrs) = match last { + let (last_field, open) = match last { Some(RecordLastField::Field(f)) => (Some(f), Default::default()), - Some(RecordLastField::Ellipsis) => - (None, RecordAttrs { open: true, ..Default::default() }), + Some(RecordLastField::Ellipsis) => (None, true), None => (None, Default::default()) }; - let pos_ellipsis = if attrs.open { - mk_pos(src_id, last_l, last_r) - } - else { - TermPos::None - }; + let pos_ellipsis = if open { + mk_pos(src_id, last_l, last_r) + } + else { + TermPos::None + }; let fields : Vec<_> = fields.into_iter().chain(last_field.into_iter()).collect(); + UniRecord { fields, tail: tail.map(|t| (t.1, mk_pos(src_id, tail_l, tail_r))), - attrs, + open, pos: TermPos::None, pos_ellipsis, } @@ -386,90 +442,67 @@ NumberLiteral: Number = { <"bin num literal">, }; -Atom: UniTerm = { - "(" > ")", +Atom: UniTerm<'ast> = { + "(" >> ")", "(" ")", - NumberLiteral => UniTerm::from(Term::Num(<>)), - "null" => UniTerm::from(Term::Null), - Bool => UniTerm::from(Term::Bool(<>)), - AsUniTerm, + NumberLiteral => UniTerm::from(alloc.number(<>)), + "null" => UniTerm::from(Node::Null), + Bool => UniTerm::from(Node::Bool(<>)), + AsUniTerm>, Ident => UniTerm::from(UniTermNode::Var(<>)), WithPos => UniTerm::from(UniTermNode::Record(<>)), - => UniTerm::from(Term::Enum(<>)), - "[" ",")*> "]" => { - let terms = terms - .into_iter() - .chain(last.into_iter()) - .collect(); - - UniTerm::from(Term::Array(terms, Default::default())) - }, - AsUniTerm>, - AsUniTerm, + EnumTag => UniTerm::from(Node::EnumVariant { tag: <>, arg: None }), + "[" > "]" => UniTerm::from(alloc.array(<>)), + AsUniTerm>, + AsUniTerm>, }; // A record field definition. The is the only place where we don't fix the type // variables inside the annotation right away (note the `Annot` instead // of `Annot`). -RecordField: FieldDef = { - ?> Field { value, ..Default::default() } - }; - +RecordField: FieldDef<'ast> = { + + + ?> + RecordLastField::Ellipsis, }; // A field path syntax in a field definition, as in `{foo."bar bar".baz = "value"}`. -FieldPath: FieldPath = { +FieldPath: Vec> = { ".")*> => { elems.push(last); elems @@ -478,17 +511,17 @@ FieldPath: FieldPath = { // A field path which only contains static string literals, that is, without any // interpolated expression in it. -pub StaticFieldPath: Vec = =>? { +pub StaticFieldPath: Vec = =>? { field_path .into_iter() .map(|elem| match elem { FieldPathElem::Ident(ident) => Ok(ident), FieldPathElem::Expr(expr) => { - let as_string = expr.as_ref().try_str_chunk_as_static_str().ok_or( + let as_string = expr.node.try_str_chunk_as_static_str().ok_or( ParseError::InterpolationInStaticPath { path_elem_span: expr.pos .into_opt() - .unwrap_or_else(|| mk_span(src_id, start, end)), + .unwrap_or_else(|| mk_span(src_id, l, r)), }, )?; Ok(LocIdent::new_with_pos(as_string, expr.pos)) @@ -501,32 +534,31 @@ pub StaticFieldPath: Vec = rule produces a -// RichTerm anyway, so it's simpler to just return it instead of artificially -// deconstructing it. +// We could just return a `Node` instead of a `Ast`, as position information is +// already stored in the span. But the rule produces an Ast anyway, so +// it's simpler to just return it instead of artificially deconstructing it. // // This rule is currently only used for the CLI and isn't part of the grammar // for normal Nickel source code. -pub CliFieldAssignment: (Vec, RichTerm, RawSpan) = - "=" > +pub CliFieldAssignment: (Vec, Ast<'ast>, RawSpan) = + "=" => (path, value, mk_span(src_id, start, end)); -FieldPathElem: FieldPathElem = { +FieldPathElem: FieldPathElem<'ast> = { => FieldPathElem::Ident(<>), - > => FieldPathElem::Expr(<>), + > => FieldPathElem::Expr(<>), }; // A pattern. @@ -557,7 +589,7 @@ FieldPathElem: FieldPathElem = { // always interpreted as `fun ('Foo) ('Bar) => ...`. The other interpretation // can be written as `fun ('Foo 'Bar) => ...`. // -// We allow parenthesized enum variants pattern in general pattern as well, not +// We allow parenthesized enum variant patterns in general patterns as well, not // only for consistency, but because they also make nested enum variant patterns // more readable: `'Foo ('Bar 5)` vs `'Foo 'Bar 5`. In fact, we also force // nested enum patterns to be parenthesized, and forbid the latter, for better @@ -615,9 +647,9 @@ FieldPathElem: FieldPathElem = { // we can ensure there's only one way to parse each and every combination with // only one look-ahead, thus satisfying the LR(1). #[inline] -PatternF: Pattern = { +PatternF: Pattern<'ast> = { - > "@")?> + "@")?> > => { Pattern { @@ -629,24 +661,24 @@ PatternF: Pattern = { }; #[inline] -PatternDataF: PatternData = { - RecordPattern => PatternData::Record(<>), - ArrayPattern => PatternData::Array(<>), - ConstantPattern => PatternData::Constant(<>), - EnumRule => PatternData::Enum(<>), - OrRule => PatternData::Or(<>), +PatternDataF: PatternData<'ast> = { + RecordPattern => PatternData::Record(alloc.alloc(<>)), + ArrayPattern => PatternData::Array(alloc.alloc(<>)), + ConstantPattern => PatternData::Constant(alloc.alloc(<>)), + EnumRule => PatternData::Enum(alloc.alloc(<>)), + OrRule => PatternData::Or(alloc.alloc(<>)), IdentRule => PatternData::Any(<>), "_" => PatternData::Wildcard, }; // A general pattern, unrestricted. #[inline] -Pattern: Pattern = PatternF; +Pattern: Pattern<'ast> = PatternF; // A pattern restricted to function arguments, which requires or-patterns and // enum variant patterns to be parenthesized at the top-level. #[inline] -PatternFun: Pattern = PatternF; +PatternFun: Pattern<'ast> = PatternF; // A pattern that can be used within a branch of an or-pattern. To avoid a // shift-reduce conflicts (because we want to allow `or` to remain a valid @@ -663,7 +695,7 @@ PatternFun: Pattern = PatternF; // // See the `PatternF` rule for an explanation of why we need those restrictions. #[inline] -PatternOrBranch: Pattern = +PatternOrBranch: Pattern<'ast> = > => { @@ -674,28 +706,28 @@ PatternOrBranch: Pattern = } }; -ConstantPattern: ConstantPattern = { +ConstantPattern: ConstantPattern<'ast> = { => ConstantPattern { data, pos: mk_pos(src_id, start, end) } }; -ConstantPatternData: ConstantPatternData = { +ConstantPatternData: ConstantPatternData<'ast> = { Bool => ConstantPatternData::Bool(<>), - NumberLiteral => ConstantPatternData::Number(<>), + NumberLiteral => ConstantPatternData::Number(alloc.number_move(<>)), // We could accept multiline strings here, but it's unlikely that this will // result in very readable match expressions. For now we restrict ourselves // to standard string; we can always extend to multiline later if needed - StandardStaticString => ConstantPatternData::String(<>.into()), + StandardStaticString => ConstantPatternData::String(alloc.alloc_str(&<>)), "null" => ConstantPatternData::Null, }; -RecordPattern: RecordPattern = { +RecordPattern: RecordPattern<'ast> = { "{" ",")*> "}" =>? { let tail = match last { Some(LastPattern::Normal(m)) => { - field_pats.push(*m); + field_pats.push(m); TailPattern::Empty }, Some(LastPattern::Ellipsis(Some(captured))) => { @@ -708,21 +740,21 @@ RecordPattern: RecordPattern = { }; let pattern = RecordPattern { - patterns: field_pats, + patterns: alloc.field_patterns(field_pats), tail, pos: mk_pos(src_id, start, end) }; - pattern.check_dup()?; + pattern.check_dup()?; Ok(pattern) - }, + } }; -ArrayPattern: ArrayPattern = { +ArrayPattern: ArrayPattern<'ast> = { "[" ",")*> "]" => { let tail = match last { Some(LastPattern::Normal(m)) => { - patterns.push(*m); + patterns.push(m); TailPattern::Empty }, Some(LastPattern::Ellipsis(Some(captured))) => { @@ -734,18 +766,16 @@ ArrayPattern: ArrayPattern = { None => TailPattern::Empty, }; - let pattern = ArrayPattern{ - patterns, + ArrayPattern { + patterns: alloc.patterns(patterns), tail, pos: mk_pos(src_id, start, end) - }; - - pattern + } }, }; // A pattern for an enum tag (without argument). -EnumTagPattern: EnumPattern = => EnumPattern { +EnumTagPattern: EnumPattern<'ast> = => EnumPattern { tag, pattern: None, pos: mk_pos(src_id, start, end), @@ -753,33 +783,33 @@ EnumTagPattern: EnumPattern = => EnumPatter // A rule which only matches an enum variant pattern of the form `' or`. // Used to disambiguate between an enum variant pattern and an or-pattern. -EnumVariantOrPattern: EnumPattern = +EnumVariantOrPattern: EnumPattern<'ast> = - > + > => { let pos_or = or_arg.pos; EnumPattern { tag, - pattern: Some(Box::new(Pattern { + pattern: Some(Pattern { data: PatternData::Any(or_arg), alias: None, pos: pos_or, - })), + }), pos: mk_pos(src_id, start, end), } }; // An enum variant pattern, excluding the `EnumVariantPatternOr` case: that is, // this rule doesn't match the case `' or`. -EnumVariantNoOrPattern: EnumPattern = +EnumVariantNoOrPattern: EnumPattern<'ast> = - >> + >> => EnumPattern { tag, - pattern: Some(Box::new(pattern)), + pattern: Some(pattern), pos: mk_pos(src_id, start, end), }; @@ -791,7 +821,7 @@ EnumVariantNoOrPattern: EnumPattern = // or-pattern, as in `'Foo or 'Bar`; but as long as we parse this common // prefix using the same rule and only disambiguate later, there is no // shift/reduce conflict. -EnumVariantPattern: EnumPattern = { +EnumVariantPattern: EnumPattern<'ast> = { EnumVariantOrPattern, EnumVariantNoOrPattern, }; @@ -800,7 +830,7 @@ EnumVariantPattern: EnumPattern = { // or-pattern. As we parse `EnumVariantOrPattern` and treat it specifically in // an `or` branch (`OrPatternBranch`), we need to remove it from the enum // pattern rule. -EnumPatternOrBranch: EnumPattern = { +EnumPatternOrBranch: EnumPattern<'ast> = { EnumVariantNoOrPattern, // Only a top-level un-parenthesized enum variant pattern can be ambiguous. // If it's parenthesized, we allow the general version including the "or" @@ -811,7 +841,7 @@ EnumPatternOrBranch: EnumPattern = { // An unparenthesized enum pattern (including both enum tags and enum // variants). -EnumPatternUnparens: EnumPattern = { +EnumPatternUnparens: EnumPattern<'ast> = { EnumTagPattern, EnumVariantPattern, }; @@ -819,14 +849,14 @@ EnumPatternUnparens: EnumPattern = { // A parenthesized enum pattern, including both tags and variants (note that an // enum tag alone is never parenthesized: parentheses only applies to enum // variant patterns). -EnumPatternParens: EnumPattern = { +EnumPatternParens: EnumPattern<'ast> = { EnumTagPattern, "(" ")", } // The unrestricted rule for enum patterns. Allows both enum tags and enum // variants, and both parenthesized and un-parenthesized enum variants. -EnumPattern: EnumPattern = { +EnumPattern: EnumPattern<'ast> = { EnumTagPattern, EnumVariantPattern, "(" ")" @@ -835,57 +865,60 @@ EnumPattern: EnumPattern = { // An individual element of an or-pattern, plus a trailing "or". This rule is a // bit artificial, and is essentially here to dispel the shift/reduce conflict // around `'Foo or`/`'Foo or 'Bar` explained in the description of `PatternF`. -OrPatternBranch: Pattern = { +OrPatternBranch: Pattern<'ast> = { // To avoid various shift-reduce conflicts, the patterns used within an // `or`-branch have several restrictions. See the `PatternOrBranch` rule. "or", // A variant pattern of the form `' or`. The trick is to instead // consider it as the enum tag pattern `'` followed by the `or` // contextual keyword after-the-fact. - => { - let pos = pat.pos; + EnumVariantOrPattern => { + let pos = <>.pos; Pattern { pos, alias: None, - data: PatternData::Enum(EnumPattern { - tag: pat.tag, + data: PatternData::Enum(alloc.enum_pattern(EnumPattern { + tag: <>.tag, pattern: None, pos, - }), + })), } }, }; // Unparenthesized or-pattern. -OrPatternUnparens: OrPattern = { +OrPatternUnparens: OrPattern<'ast> = { > => { - let patterns = - patterns.into_iter().chain(std::iter::once(last)).collect(); + // We need to collect in a vector here because the allocator needs an + // exact sized iterator to know beforehand how much memory it needs to + // reserve + let patterns : Vec<_> = + patterns.into_iter().chain(iter::once(last)).collect(); OrPattern { - patterns, + patterns: alloc.patterns(patterns), pos: mk_pos(src_id, start, end), } }, }; // Parenthesized or-pattern. -OrPatternParens: OrPattern = { +OrPatternParens: OrPattern<'ast> = { "(" ")", }; // Unrestricted or-pattern, which can be parenthesized or not. -OrPattern: OrPattern = { +OrPattern: OrPattern<'ast> = { OrPatternUnparens, OrPatternParens, } // A binding `ident = ` inside a record pattern. -FieldPattern: FieldPattern = { +FieldPattern: FieldPattern<'ast> = { ?> "=" => FieldPattern { matched_id, @@ -908,30 +941,37 @@ FieldPattern: FieldPattern = { }, }; -// Last field pattern of a record pattern -LastFieldPat: LastPattern = { - FieldPattern => LastPattern::Normal(Box::new(<>)), +// Last field pattern of a record pattern. +// +// We need this rule (together with `LastElemPat`) combining both a last field +// or a potential ellipsis because putting the ellipsis in a separate rule AND +// handling the case of zero fields (`{..}`) isn't possible: the fact that the +// ellipsis will need a "," separator before depends on the presence of zero or +// more fields, but a stand-alone ellipsis rule has no way to get this +// information about previous match. +LastFieldPat: LastPattern> = { + FieldPattern => LastPattern::Normal(<>), ".." => LastPattern::Ellipsis(<>), }; -// Last pattern of an array pattern -LastElemPat: LastPattern = { - Pattern => LastPattern::Normal(Box::new(<>)), +// Last pattern of an array pattern. See `LastFieldPat`. +LastElemPat: LastPattern> = { + Pattern => LastPattern::Normal(<>), ".." => LastPattern::Ellipsis(<>), } // A default annotation in a pattern. -DefaultAnnot: RichTerm = "?" => t; +DefaultAnnot: Ast<'ast> = "?" ; // A metadata keyword returned as an indent. In some positions, those are // considered valid identifiers. See ExtendedIdent below. -MetadataKeyword: LocIdent = { - "doc" => LocIdent::new("doc"), - "default" => LocIdent::new("default"), - "force" => LocIdent::new("force"), - "priority" => LocIdent::new("priority"), - "optional" => LocIdent::new("optional"), - "not_exported" => LocIdent::new("not_exported"), +MetadataKeyword: Ident = { + "doc" => Ident::new("doc"), + "default" => Ident::new("default"), + "force" => Ident::new("force"), + "priority" => Ident::new("priority"), + "optional" => Ident::new("optional"), + "not_exported" => Ident::new("not_exported"), }; // We allow metadata keywords (optional, default, doc, etc.) as field names @@ -943,25 +983,24 @@ MetadataKeyword: LocIdent = { // // Thus, for fields, ExtendedIdent is use in place of Ident. ExtendedIdent: LocIdent = { - WithPos, + SpannedId, Ident, }; // The "or" contextual keyword, parsed as an indent. -IdentOr: LocIdent = "or" => LocIdent::new("or"); +IdentOr: Ident = "or" => Ident::new("or"); // The "as" contextual keyword, parsed as an indent. -IdentAs: LocIdent = "as" => LocIdent::new("as"); - +IdentAs: Ident = "as" => Ident::new("as"); // The set of pure identifiers, which are never keywords in any context. -RestrictedIdent: LocIdent = "identifier" => LocIdent::new(<>); +RestrictedIdent: Ident = "identifier" => Ident::new(<>); // Identifiers allowed everywhere, which includes pure identifiers and contextual // keywords. #[inline] Ident: LocIdent = { - WithPos, - WithPos, - WithPos, + SpannedId, + SpannedId, + SpannedId, }; Bool: bool = { @@ -970,56 +1009,53 @@ Bool: bool = { }; // String-like syntax which supports interpolation. -// Depending on the opening brace, these either parse as strings, or as "symbolic strings", -// which get desugared here to an array of terms. -StrChunks: RichTerm = { - => { +// +// Depending on the opening brace, these either parse as strings, or as +// "symbolic strings", which get desugared here to an array of terms. +StringChunks: Node<'ast> = { + // The lexer emits a stream of groups of `ChunkTerm` interspersed by one + // `ChunkLiteral`: consecutive chunks literals are fused by the lexer. + => { debug_assert!( start.is_closed_by(&end), "Fatal parser error: a string starting with {start:?} should never be closed by {end:?}" ); - let chunks: Vec> = fst.into_iter() - .map(StrChunk::Literal) + let mut chunks: Vec>> = fst.into_iter() + .map(StringChunk::Literal) .chain(chunks.into_iter() .map(|(mut es, s)| { - es.push(StrChunk::Literal(s)); + es.push(StringChunk::Literal(s)); es }) .flatten()) .chain(lasts.into_iter()) .collect(); - let chunks = if start.needs_strip_indent() { - strip_indent(chunks) - } else { - chunks - }; + if start.needs_strip_indent() { + strip_indent(&mut chunks); + } + // In the case of symbolic strings, we don't produce a string (in + // practice string chunks). The chunks are reified to an Nickel array + // and wrapped in a record instead. if let StringStartDelimiter::Symbolic(prefix) = start { let terms = chunks.into_iter().map(|chunk| match chunk { - StrChunk::Literal(_) => Term::StrChunks(vec![chunk]).into(), - StrChunk::Expr(e, _) => e, - }).collect(); - - RichTerm::from(build_record([ - ( - FieldPathElem::Ident("tag".into()), - Field::from(RichTerm::from(Term::Enum("SymbolicString".into()))) - ), - ( - FieldPathElem::Ident("prefix".into()), - Field::from(RichTerm::from(Term::Enum(prefix.into()))) - ), - ( - FieldPathElem::Ident("fragments".into()), - Field::from(RichTerm::from(Term::Array(terms, Default::default()))) - ) - ], Default::default())) + StringChunk::Literal(_) => alloc.string_chunks(iter::once(chunk)).into(), + StringChunk::Expr(e, _) => e, + }); + + builder::Record::new() + .field("tag") + .value(alloc, builder::enum_tag("SymbolicString")) + .field("prefix") + .value(alloc, builder::enum_tag(prefix)) + .field("fragments") + .value(alloc, alloc.array(terms)) + .build(alloc) + .node } else { - let mut chunks = chunks; - chunks.reverse(); - RichTerm::from(Term::StrChunks(chunks)) + alloc.string_chunks(chunks) } }, }; @@ -1047,36 +1083,50 @@ ChunkLiteral : String = }) }; -ChunkExpr: StrChunk = Interpolation > "}" => StrChunk::Expr(t, 0); +// An interpolated expression in a string: `%{}`. +ChunkTerm: StringChunk> = Interpolation "}" => StringChunk::Expr(<>, 0); +// The opening sequence of string interpolation. Interpolation = { "%{", "multstr %{" }; // A construct which looks like a string, but is generic over its delimiters. // Used to implement `StaticString` as well as `StringEnumTag`. DelimitedStaticString: String = Start End => s.unwrap_or_default(); +// A static string using the basic string syntax (delimited by double quotes). StandardStaticString = DelimitedStaticString<"\"", "\"">; +// A static string using the multiline string syntax. MultilineStaticString: String = DelimitedStaticString<"m%\"","\"%"> => { // strip the common indentation prefix - let chunks: Vec> = vec![StrChunk::Literal(<>)]; - match strip_indent(chunks).pop().unwrap() { - StrChunk::Literal(s) => s, - // We build + let mut chunks = vec![StringChunk::Literal(<>)]; + strip_indent(&mut chunks); + + // unwrap(): we crated the vector just above with exactly one element, and + // `strip_indent` doesn't change the size of its vector argument, so there's + // still exactly one element + match chunks.pop().unwrap() { + StringChunk::Literal(s) => s, + // unreachable: we built the only element as a `StringChunk::Literal`, + // and `strip_indent` doesn't change the nature of chunks, so the only + // element can't be anything else (an expression) _ => unreachable!(), } }; -StaticString : String = { +// A string which must be known statically without having to run the program. In +// practice, it's a string where interpolation isn't allowed. +StaticString: String = { StandardStaticString, MultilineStaticString, } +// A quoted enum tag, which can contain spaces or other special characters. StringEnumTag = DelimitedStaticString<"'\"", "\"">; EnumTag: LocIdent = { "raw enum tag" => <>.into(), - => <>.into(), + StringEnumTag => <>.into(), }; ChunkLiteralPart: ChunkLiteralPart = { @@ -1085,46 +1135,47 @@ ChunkLiteralPart: ChunkLiteralPart = { "str esc char" => ChunkLiteralPart::Char(<>), }; -UOp: UnaryOp = { - "typeof" => UnaryOp::Typeof, - "blame" => UnaryOp::Blame, - "label/flip_polarity" => UnaryOp::LabelFlipPol, - "label/polarity" => UnaryOp::LabelPol, - "label/go_dom" => UnaryOp::LabelGoDom, - "label/go_codom" => UnaryOp::LabelGoCodom, - "label/go_array" => UnaryOp::LabelGoArray, - "label/go_dict" => UnaryOp::LabelGoDict, - "enum/embed" => UnaryOp::EnumEmbed(<>), - "array/map" => UnaryOp::ArrayMap, - "array/generate" => UnaryOp::ArrayGen, - "record/map" => UnaryOp::RecordMap, - "seq" => UnaryOp::Seq, - "deep_seq" => UnaryOp::DeepSeq, - "op force" => UnaryOp::Force{ ignore_not_exported: false }, - "array/length" => UnaryOp::ArrayLength, - "record/fields" => UnaryOp::RecordFields(RecordOpKind::IgnoreEmptyOpt), - "record/fields_with_opts" => UnaryOp::RecordFields(RecordOpKind::ConsiderAllFields), - "record/values" => UnaryOp::RecordValues, - "string/trim" => UnaryOp::StringTrim, - "string/chars" => UnaryOp::StringChars, - "string/uppercase" => UnaryOp::StringUppercase, - "string/lowercase" => UnaryOp::StringLowercase, - "string/length" => UnaryOp::StringLength, - "to_string" => UnaryOp::ToString, - "number/from_string" => UnaryOp::NumberFromString, - "enum/from_string" => UnaryOp::EnumFromString, - "string/is_match" => UnaryOp::StringIsMatch, - "string/find" => UnaryOp::StringFind, - "string/find_all" => UnaryOp::StringFindAll, - "op rec_force" => UnaryOp::RecForce, - "op rec_default" => UnaryOp::RecDefault, - "record/empty_with_tail" => UnaryOp::RecordEmptyWithTail, - "trace" => UnaryOp::Trace, - "label/push_diag" => UnaryOp::LabelPushDiag, +UOp: PrimOp = { + "typeof" => PrimOp::Typeof, + "blame" => PrimOp::Blame, + "label/flip_polarity" => PrimOp::LabelFlipPol, + "label/polarity" => PrimOp::LabelPol, + "label/go_dom" => PrimOp::LabelGoDom, + "label/go_codom" => PrimOp::LabelGoCodom, + "label/go_array" => PrimOp::LabelGoArray, + "label/go_dict" => PrimOp::LabelGoDict, + "enum/embed" => PrimOp::EnumEmbed(<>), + "array/map" => PrimOp::ArrayMap, + "array/generate" => PrimOp::ArrayGen, + "record/map" => PrimOp::RecordMap, + "seq" => PrimOp::Seq, + "deep_seq" => PrimOp::DeepSeq, + "op force" => PrimOp::Force{ ignore_not_exported: false }, + "array/length" => PrimOp::ArrayLength, + "record/fields" => PrimOp::RecordFields(RecordOpKind::IgnoreEmptyOpt), + "record/fields_with_opts" => PrimOp::RecordFields(RecordOpKind::ConsiderAllFields), + "record/values" => PrimOp::RecordValues, + "string/trim" => PrimOp::StringTrim, + "string/chars" => PrimOp::StringChars, + "string/uppercase" => PrimOp::StringUppercase, + "string/lowercase" => PrimOp::StringLowercase, + "string/length" => PrimOp::StringLength, + "to_string" => PrimOp::ToString, + "number/from_string" => PrimOp::NumberFromString, + "enum/from_string" => PrimOp::EnumFromString, + "string/is_match" => PrimOp::StringIsMatch, + "string/find" => PrimOp::StringFind, + "string/find_all" => PrimOp::StringFindAll, + // Currently recursive priorities are disabled (since 1.2.0). + // "op rec_force" => PrimOp::RecForce, + // "op rec_default" => PrimOp::RecDefault, + "record/empty_with_tail" => PrimOp::RecordEmptyWithTail, + "trace" => PrimOp::Trace, + "label/push_diag" => PrimOp::LabelPushDiag, "eval_nix" =>? { #[cfg(feature = "nix-experimental")] { - Ok(UnaryOp::EvalNix) + Ok(PrimOp::EvalNix) } #[cfg(not(feature = "nix-experimental"))] { @@ -1136,144 +1187,127 @@ UOp: UnaryOp = { }) } }, - "enum/get_arg" => UnaryOp::EnumGetArg, - "enum/make_variant" => UnaryOp::EnumMakeVariant, - "enum/is_variant" => UnaryOp::EnumIsVariant, - "enum/get_tag" => UnaryOp::EnumGetTag, - "contract/custom" => UnaryOp::ContractCustom, - "number/arccos" => UnaryOp::NumberArcCos, - "number/arcsin" => UnaryOp::NumberArcSin, - "number/arctan" => UnaryOp::NumberArcTan, - "number/cos" => UnaryOp::NumberCos, - "number/sin" => UnaryOp::NumberSin, - "number/tan" => UnaryOp::NumberTan, + "enum/get_arg" => PrimOp::EnumGetArg, + "enum/make_variant" => PrimOp::EnumMakeVariant, + "enum/is_variant" => PrimOp::EnumIsVariant, + "enum/get_tag" => PrimOp::EnumGetTag, + "contract/custom" => PrimOp::ContractCustom, + "number/arccos" => PrimOp::NumberArcCos, + "number/arcsin" => PrimOp::NumberArcSin, + "number/arctan" => PrimOp::NumberArcTan, + "number/cos" => PrimOp::NumberCos, + "number/sin" => PrimOp::NumberSin, + "number/tan" => PrimOp::NumberTan, } -PatternGuard: RichTerm = "if" => <>; +PatternGuard: Ast<'ast> = "if" => <>; -MatchBranch: MatchBranch = +MatchBranch: MatchBranch<'ast> = "=>" => MatchBranch { pattern, guard, body}; // Infix operators by precedence levels. Lowest levels take precedence over // highest ones. -InfixBOp2: BinaryOp = { - "++" => BinaryOp::StringConcat, - "@" => BinaryOp::ArrayConcat, +InfixBOp2: PrimOp = { + "++" => PrimOp::StringConcat, + "@" => PrimOp::ArrayConcat, } -InfixBOp3: BinaryOp = { - "*" => BinaryOp::Mult, - "/" => BinaryOp::Div, - "%" => BinaryOp::Modulo, +InfixBOp3: PrimOp = { + "*" => PrimOp::Mult, + "/" => PrimOp::Div, + "%" => PrimOp::Modulo, } -InfixBOp4: BinaryOp = { - "+" => BinaryOp::Plus, - "-" => BinaryOp::Sub, +InfixBOp4: PrimOp = { + "+" => PrimOp::Plus, + "-" => PrimOp::Sub, } -InfixUOp5: UnaryOp = { - "!" => UnaryOp::BoolNot, +InfixUOp5: PrimOp = { + "!" => PrimOp::BoolNot, } -InfixBOp7: BinaryOp = { - "<" => BinaryOp::LessThan, - "<=" => BinaryOp::LessOrEq, - ">" => BinaryOp::GreaterThan, - ">=" => BinaryOp::GreaterOrEq, +InfixBOp6: PrimOp = { + "&" => PrimOp::Merge(MergeKind::Standard), } -InfixBOp8: BinaryOp = { - "==" => BinaryOp::Eq, +InfixBOp7: PrimOp = { + "<" => PrimOp::LessThan, + "<=" => PrimOp::LessOrEq, + ">" => PrimOp::GreaterThan, + ">=" => PrimOp::GreaterOrEq, } -InfixLazyBOp9: UnaryOp = { - "&&" => UnaryOp::BoolAnd, +InfixBOp8: PrimOp = { + "==" => PrimOp::Eq, } -InfixLazyBOp10: UnaryOp = { - "||" => UnaryOp::BoolOr, +InfixLazyBOp9: PrimOp = { + "&&" => PrimOp::BoolAnd, } -InfixBOp: BinaryOp = { +InfixLazyBOp10: PrimOp = { + "||" => PrimOp::BoolOr, +} + +InfixBOp: PrimOp = { InfixBOp2, InfixBOp3, InfixBOp4, + InfixBOp6, InfixBOp7, InfixBOp8, } -InfixUOpOrLazyBOp: UnaryOp = { +InfixUOpOrLazyBOp: PrimOp = { InfixUOp5, InfixLazyBOp9, InfixLazyBOp10, } InfixOp: InfixOp = { - => <>.into(), - => <>.into(), + InfixBOp => InfixOp(<>), + InfixUOpOrLazyBOp => InfixOp(<>), } -CurriedOp: RichTerm = { - => - op.eta_expand(mk_pos(src_id, l, r)), - "&" => - InfixOp::from(BinaryOp::Merge(mk_merge_label(src_id, l, r))) - .eta_expand(mk_pos(src_id, l, r)), - "|>" => - mk_fun!("x1", "x2", - mk_app!(mk_term::var("x2"), mk_term::var("x1")) - .with_pos(mk_pos(src_id, l, r)) - ), - "!=" => - mk_fun!("x1", "x2", - mk_term::op1( - UnaryOp::BoolNot, - Term::Op2(BinaryOp::Eq, - mk_term::var("x1"), - mk_term::var("x2") - ) - ) - .with_pos(mk_pos(src_id, l, r)) - ), - //`foo.bar` is a static - // record access, but when used in a curried form, it's a dynamic record - // access (that is, `(.) foo bar` is `foo."%{bar}"`). It turns out a dynamic - // record access takes the record as the last argument, in the style of the - // stdlib. If we want `(.) foo bar` to be `foo."%{bar}"`, we thus have to - // flip the arguments. - "." => - mk_fun!( - "x1", - "x2", - mk_term::op2( - BinaryOp::RecordGet, - mk_term::var("x2"), - mk_term::var("x1"), - ).with_pos(mk_pos(src_id, l, r)) - ), +EtaExpand: Node<'ast> = => + op.eta_expand(alloc, mk_pos(src_id, l, r)); + +// Infix ops that are desugared away but for which we still need support the +// curried operator syntax. +ExtendedInfixOp: ExtendedInfixOp = { + "|>" => ExtendedInfixOp::ReverseApp, + "!=" => ExtendedInfixOp::NotEqual, +} + +DotAsInfixOp: InfixOp = "." => InfixOp(PrimOp::RecordGet); + +CurriedOp: Node<'ast> = { + EtaExpand, + EtaExpand, + EtaExpand, } -InfixUOpApp: UniTerm = - > => UniTerm::from(mk_term::op1(op, t)); +InfixUOpApp: UniTerm<'ast> = + > => UniTerm::from(alloc.prim_op(op, iter::once(e))); -InfixBOpApp: UniTerm = - > > => - UniTerm::from(mk_term::op2(op, t1, t2)); +InfixBOpApp: UniTerm<'ast> = + > > => + UniTerm::from(primop_app!(alloc, op, e1, e2)); -InfixLazyBOpApp: UniTerm = - > > => - UniTerm::from(mk_app!(mk_term::op1(op, t1), t2)); +InfixLazyBOpApp: UniTerm<'ast> = + > > => + UniTerm::from(app!(alloc, primop_app!(alloc, op, e1), e2)); -InfixExpr: UniTerm = { +InfixExpr: UniTerm<'ast> = { #[precedence(level="0")] Applicative, #[precedence(level="1")] "-" > => - UniTerm::from(mk_term::op2(BinaryOp::Sub, Term::Num(Number::ZERO), <>)), + UniTerm::from(primop_app!(alloc, PrimOp::Sub, alloc.number(Number::ZERO), <>)), #[precedence(level="2")] #[assoc(side="left")] InfixBOpApp, @@ -1288,11 +1322,9 @@ InfixExpr: UniTerm = { InfixUOpApp, #[precedence(level="6")] #[assoc(side="left")] - > "&" > => - UniTerm::from(mk_term::op2(BinaryOp::Merge(mk_merge_label(src_id, l, r)), t1, t2)), - + InfixBOpApp, > "|>" > => - UniTerm::from(mk_app!(t2, t1)), + UniTerm::from(app!(alloc, t2, t1)), #[precedence(level="7")] #[assoc(side="left")] InfixBOpApp, @@ -1301,7 +1333,11 @@ InfixExpr: UniTerm = { InfixBOpApp, > "!=" > => UniTerm::from( - mk_term::op1(UnaryOp::BoolNot, Term::Op2(BinaryOp::Eq, t1, t2)) + primop_app!( + alloc, + PrimOp::BoolNot, + primop_app!(alloc, PrimOp::Eq, t1, t2), + ) ), #[precedence(level="9")] #[assoc(side="left")] @@ -1312,87 +1348,77 @@ InfixExpr: UniTerm = { #[precedence(level="11")] #[assoc(side="right")] > "->" > => - UniTerm::from(Type::from(TypeF::Arrow(Box::new(s), Box::new(t)))), + UniTerm::from(Type::from(TypeF::Arrow(alloc.alloc(s), alloc.alloc(t)))), } -BOpPre: BinaryOp = { - "contract/apply" => BinaryOp::ContractApply, - "contract/check" => BinaryOp::ContractCheck, - "contract/array_lazy_app" => BinaryOp::ContractArrayLazyApp, - "contract/record_lazy_app" => BinaryOp::ContractRecordLazyApp, - "unseal" => BinaryOp::Unseal, - "seal" => BinaryOp::Seal, - "label/go_field" => BinaryOp::LabelGoField, - "record/has_field" => BinaryOp::RecordHasField(RecordOpKind::IgnoreEmptyOpt), - "record/has_field_with_opts" => BinaryOp::RecordHasField(RecordOpKind::ConsiderAllFields), - "record/field_is_defined" => BinaryOp::RecordFieldIsDefined(RecordOpKind::IgnoreEmptyOpt), - "record/field_is_defined_with_opts" => BinaryOp::RecordFieldIsDefined(RecordOpKind::ConsiderAllFields), - "array/at" => BinaryOp::ArrayAt, - "hash" => BinaryOp::Hash, - "serialize" => BinaryOp::Serialize, - "deserialize" => BinaryOp::Deserialize, - "number/arctan2" => BinaryOp::NumberArcTan2, - "number/log" => BinaryOp::NumberLog, - "pow" => BinaryOp::Pow, - "string/split" => BinaryOp::StringSplit, - "string/contains" => BinaryOp::StringContains, - "string/compare" => BinaryOp::StringCompare, - "record/insert" => BinaryOp::RecordInsert { - ext_kind: RecordExtKind::WithValue, - metadata: Default::default(), - pending_contracts: Default::default(), - op_kind: RecordOpKind::IgnoreEmptyOpt, - }, - "record/insert_with_opts" => BinaryOp::RecordInsert { - ext_kind: RecordExtKind::WithValue, - metadata: Default::default(), - pending_contracts: Default::default(), - op_kind: RecordOpKind::ConsiderAllFields, - }, - "record/remove" => BinaryOp::RecordRemove(RecordOpKind::IgnoreEmptyOpt), - "record/remove_with_opts" => BinaryOp::RecordRemove(RecordOpKind::ConsiderAllFields), - "record/split_pair" => BinaryOp::RecordSplitPair, - "record/disjoint_merge" => BinaryOp::RecordDisjointMerge, - "label/with_message" => BinaryOp::LabelWithMessage, - "label/with_notes" => BinaryOp::LabelWithNotes, - "label/append_note" => BinaryOp::LabelAppendNote, - "label/lookup_type_variable" => BinaryOp::LabelLookupTypeVar, +BOpPre: PrimOp = { + "contract/apply" => PrimOp::ContractApply, + "contract/check" => PrimOp::ContractCheck, + "contract/array_lazy_app" => PrimOp::ContractArrayLazyApp, + "contract/record_lazy_app" => PrimOp::ContractRecordLazyApp, + "seal" => PrimOp::Seal, + "unseal" => PrimOp::Unseal, + "label/go_field" => PrimOp::LabelGoField, + "record/has_field" => PrimOp::RecordHasField(RecordOpKind::IgnoreEmptyOpt), + "record/has_field_with_opts" => PrimOp::RecordHasField(RecordOpKind::ConsiderAllFields), + "record/field_is_defined" => PrimOp::RecordFieldIsDefined(RecordOpKind::IgnoreEmptyOpt), + "record/field_is_defined_with_opts" => PrimOp::RecordFieldIsDefined(RecordOpKind::ConsiderAllFields), + "array/at" => PrimOp::ArrayAt, + "hash" => PrimOp::Hash, + "serialize" => PrimOp::Serialize, + "deserialize" => PrimOp::Deserialize, + "number/arctan2" => PrimOp::NumberArcTan2, + "number/log" => PrimOp::NumberLog, + "pow" => PrimOp::Pow, + "string/split" => PrimOp::StringSplit, + "string/contains" => PrimOp::StringContains, + "string/compare" => PrimOp::StringCompare, + "record/insert" => PrimOp::RecordInsert(RecordOpKind::IgnoreEmptyOpt), + "record/insert_with_opts" => PrimOp::RecordInsert(RecordOpKind::ConsiderAllFields), + "record/remove" => PrimOp::RecordRemove(RecordOpKind::IgnoreEmptyOpt), + "record/remove_with_opts" => PrimOp::RecordRemove(RecordOpKind::ConsiderAllFields), + "record/split_pair" => PrimOp::RecordSplitPair, + "record/disjoint_merge" => PrimOp::RecordDisjointMerge, + "label/with_message" => PrimOp::LabelWithMessage, + "label/with_notes" => PrimOp::LabelWithNotes, + "label/append_note" => PrimOp::LabelAppendNote, + "label/lookup_type_variable" => PrimOp::LabelLookupTypeVar, } -NOpPre: UniTerm = { +NOpPre: UniTerm<'ast> = { "string/replace" => - UniTerm::from(mk_opn!(NAryOp::StringReplace, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::StringReplace, t1, t2, t3)), "string/replace_regex" => - UniTerm::from(mk_opn!(NAryOp::StringReplaceRegex, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::StringReplaceRegex, t1, t2, t3)), "string/substr" => - UniTerm::from(mk_opn!(NAryOp::StringSubstr, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::StringSubstr, t1, t2, t3)), "record/seal_tail" => - UniTerm::from(mk_opn!(NAryOp::RecordSealTail, t1, t2, t3, t4)), + UniTerm::from(primop_app!(alloc, PrimOp::RecordSealTail, t1, t2, t3, t4)), "record/unseal_tail" => - UniTerm::from(mk_opn!(NAryOp::RecordUnsealTail, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::RecordUnsealTail, t1, t2, t3)), "label/insert_type_variable" => - UniTerm::from(mk_opn!(NAryOp::LabelInsertTypeVar, key, pol, label)), + UniTerm::from(primop_app!(alloc, PrimOp::LabelInsertTypeVar, key, pol, label)), "array/slice" => - UniTerm::from(mk_opn!(NAryOp::ArraySlice, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::ArraySlice, t1, t2, t3)), "record/merge_contract" => - UniTerm::from(mk_opn!(NAryOp::MergeContract, t1, t2, t3)), + UniTerm::from(primop_app!(alloc, PrimOp::MergeContract, t1, t2, t3)), } -TypeBuiltin: Type = { - "Dyn" => Type::from(TypeF::Dyn), - "Number" => Type::from(TypeF::Number), - "Bool" => Type::from(TypeF::Bool), - "String" => Type::from(TypeF::String), +TypeBuiltin: TypeUnr<'ast> = { + "Dyn" => TypeF::Dyn, + "Number" => TypeF::Number, + "Bool" => TypeF::Bool, + "String" => TypeF::String, } -TypeEnumRow: EnumRow = )?> => { +TypeEnumRow: EnumRow<'ast> = )?> => { EnumRow { id, - typ: typ.map(Box::new), + typ: typ.map(|ty| alloc.alloc(ty)), } }; -TypeEnum: Type = "[|" ",")*> )?> )?> "|]" => { +TypeEnum: TypeUnr<'ast> = "[|" ",")*> )?> )?> "|]" => { let ty = rows.into_iter() .chain(last.into_iter()) // As we build row types as a linked list via a fold on the original @@ -1401,31 +1427,30 @@ TypeEnum: Type = "[|" ",")*> )?> EnumRowsF::TailVar(id), - None => EnumRowsF::Empty, - } - ), + match tail { + Some(id) => EnumRowsF::TailVar(id), + None => EnumRowsF::Empty, + } + , |erows, row| { - EnumRows(EnumRowsF::Extend { + EnumRowsF::Extend { row, - tail: Box::new(erows) - }) + tail: alloc.enum_rows(erows) + } } ); - Type::from(TypeF::Enum(ty)) + TypeF::Enum(EnumRows(ty)) }; -TypeAtom: Type = { - , - , - "{" "_" ":" > "}" => { - Type::from(TypeF::Dict { - type_fields: Box::new(t), +TypeAtom: TypeUnr<'ast> = { + TypeBuiltin, + TypeEnum, + "{" "_" ":" "}" => { + TypeF::Dict { + type_fields: alloc.alloc(<>), flavour: DictTypeFlavour::Type - }) + } }, // Although dictionary contracts aren't really types, we treat them as // types for now - at least syntactically - as they are represented using a @@ -1440,16 +1465,17 @@ TypeAtom: Type = { // right away inside the dictionary contract (before the enclosing `forall` // is fixed) will indeed turn it into a term variable, and raise an unbound // type variable error. - "{" "_" "|" > "}" => { - Type::from(TypeF::Dict { - type_fields: Box::new(t), + "{" "_" "|" "}" => { + TypeF::Dict { + type_fields: alloc.alloc(<>), flavour: DictTypeFlavour::Contract - }) + } }, "_" => { let id = *next_wildcard_id; *next_wildcard_id += 1; - Type::from(TypeF::Wildcard(id)) + + TypeF::Wildcard(id) }, } diff --git a/core/src/parser/mod.rs b/core/src/parser/mod.rs index d0a7279a3c..7d7b762205 100644 --- a/core/src/parser/mod.rs +++ b/core/src/parser/mod.rs @@ -1,9 +1,13 @@ +use crate::bytecode::ast::{ + compat::{FromAst, ToMainline}, + typ::Type, + Ast, AstAlloc, +}; use crate::error::{ParseError, ParseErrors}; use crate::files::FileId; use crate::identifier::LocIdent; +use crate::metrics; use crate::position::RawSpan; -use crate::term::RichTerm; -use crate::typ::Type; use lalrpop_util::lalrpop_mod; lalrpop_mod!( @@ -16,23 +20,25 @@ use grammar::__ToTriple; pub mod error; pub mod lexer; -pub mod uniterm; +pub(crate) mod uniterm; pub mod utils; #[cfg(test)] mod tests; -/// Either a term or a toplevel let declaration. +/// Either an expression or a toplevel let declaration. +/// /// Used exclusively in the REPL to allow the defining of variables without having to specify `in`. -/// For instance: +/// For example: +/// /// ```text /// nickel>let foo = 1 /// nickel>foo /// 1 /// ``` -pub enum ExtendedTerm { - RichTerm(RichTerm), - ToplevelLet(LocIdent, RichTerm), +pub enum ExtendedTerm { + Term(T), + ToplevelLet(LocIdent, T), } // The interface of LALRPOP-generated parsers, for each public rule. This trait is used as a facade @@ -41,9 +47,12 @@ pub enum ExtendedTerm { // of this module, if we don't want our implementation to be coupled to LALRPOP details. // // The type of `parse` was just copy-pasted from the generated code of LALRPOP. -trait LalrpopParser { +//TODO: We could avoid having those pesky `'ast` lifetimes at the top-level of every trait using +//generic associated types, but it's not entirely trivial - to investigate. +trait LalrpopParser<'ast, T> { fn parse<'input, 'err, 'wcard, __TOKEN, __TOKENS>( &self, + alloc: &'ast AstAlloc, src_id: FileId, errors: &'err mut Vec< lalrpop_util::ErrorRecovery, self::error::ParseError>, @@ -52,7 +61,7 @@ trait LalrpopParser { __tokens0: __TOKENS, ) -> Result, self::error::ParseError>> where - __TOKEN: __ToTriple<'input, 'err, 'wcard>, + __TOKEN: __ToTriple<'input, 'ast, 'err, 'wcard>, __TOKENS: IntoIterator; } @@ -60,9 +69,10 @@ trait LalrpopParser { /// LALRPOP. macro_rules! generate_lalrpop_parser_impl { ($parser:ty, $output:ty) => { - impl LalrpopParser<$output> for $parser { + impl<'ast> LalrpopParser<'ast, $output> for $parser { fn parse<'input, 'err, 'wcard, __TOKEN, __TOKENS>( &self, + alloc: &'ast AstAlloc, src_id: FileId, errors: &'err mut Vec< lalrpop_util::ErrorRecovery< @@ -78,54 +88,67 @@ macro_rules! generate_lalrpop_parser_impl { lalrpop_util::ParseError, self::error::ParseError>, > where - __TOKEN: __ToTriple<'input, 'err, 'wcard>, + __TOKEN: __ToTriple<'input, 'ast, 'err, 'wcard>, __TOKENS: IntoIterator, { - Self::parse(self, src_id, errors, next_wildcard_id, __tokens0) + Self::parse(self, alloc, src_id, errors, next_wildcard_id, __tokens0) } } }; } -generate_lalrpop_parser_impl!(grammar::ExtendedTermParser, ExtendedTerm); -generate_lalrpop_parser_impl!(grammar::TermParser, RichTerm); -generate_lalrpop_parser_impl!(grammar::FixedTypeParser, Type); +generate_lalrpop_parser_impl!(grammar::ExtendedTermParser, ExtendedTerm>); +generate_lalrpop_parser_impl!(grammar::TermParser, Ast<'ast>); +generate_lalrpop_parser_impl!(grammar::FixedTypeParser, Type<'ast>); generate_lalrpop_parser_impl!(grammar::StaticFieldPathParser, Vec); generate_lalrpop_parser_impl!( grammar::CliFieldAssignmentParser, - (Vec, RichTerm, RawSpan) + (Vec, Ast<'ast>, RawSpan) ); -/// Generic interface of the various specialized Nickel parsers. +/// General interface of the various specialized Nickel parsers. /// /// `T` is the product of the parser (a term, a type, etc.). -pub trait ErrorTolerantParser { +pub trait ErrorTolerantParser<'ast, T> { /// Parse a value from a lexer with the given `file_id` in an error-tolerant way. This methods /// can still fail for non-recoverable errors. fn parse_tolerant( &self, + alloc: &'ast AstAlloc, file_id: FileId, lexer: lexer::Lexer, ) -> Result<(T, ParseErrors), ParseError>; /// Parse a value from a lexer with the given `file_id`, failing at the first encountered /// error. - fn parse_strict(&self, file_id: FileId, lexer: lexer::Lexer) -> Result; + fn parse_strict( + &self, + alloc: &'ast AstAlloc, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result; } -impl ErrorTolerantParser for P +impl<'ast, T, P> ErrorTolerantParser<'ast, T> for P where - P: LalrpopParser, + P: LalrpopParser<'ast, T>, { fn parse_tolerant( &self, + alloc: &'ast AstAlloc, file_id: FileId, lexer: lexer::Lexer, ) -> Result<(T, ParseErrors), ParseError> { let mut parse_errors = Vec::new(); let mut next_wildcard_id = 0; let result = self - .parse(file_id, &mut parse_errors, &mut next_wildcard_id, lexer) + .parse( + alloc, + file_id, + &mut parse_errors, + &mut next_wildcard_id, + lexer, + ) .map_err(|err| ParseError::from_lalrpop(err, file_id)); let parse_errors = ParseErrors::from_recoverable(parse_errors, file_id); @@ -135,11 +158,127 @@ where } } - fn parse_strict(&self, file_id: FileId, lexer: lexer::Lexer) -> Result { - match self.parse_tolerant(file_id, lexer) { + fn parse_strict( + &self, + alloc: &'ast AstAlloc, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result { + match self.parse_tolerant(alloc, file_id, lexer) { Ok((t, e)) if e.no_errors() => Ok(t), Ok((_, e)) => Err(e), Err(e) => Err(e.into()), } } } + +/// General interface of the various specialized Nickel parsers. +/// +/// This trait is a compatibility layer version of [ErrorTolerantParser]. It produces data of the +/// old, mainline types because the current pipeline still depends on them (defined in +/// [crate::term]). Eventually we'll get rid of it and only use [ErrorTolerantParser], which +/// produces the new AST instead. +pub trait ErrorTolerantParserCompat { + /// Parse a value from a lexer with the given `file_id` in an error-tolerant way. This methods + /// can still fail for non-recoverable errors. + fn parse_tolerant_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<(T, ParseErrors), ParseError>; + + /// Parse a value from a lexer with the given `file_id`, failing at the first encountered + /// error. + fn parse_strict_compat(&self, file_id: FileId, lexer: lexer::Lexer) -> Result; +} + +impl<'ast> FromAst>> for ExtendedTerm { + fn from_ast(ast: &ExtendedTerm>) -> Self { + match ast { + ExtendedTerm::Term(t) => ExtendedTerm::Term(t.to_mainline()), + ExtendedTerm::ToplevelLet(ident, t) => { + ExtendedTerm::ToplevelLet(*ident, t.to_mainline()) + } + } + } +} + +// Generate boilerplate impl to produce legacy mainline types from the available parsers. +macro_rules! generate_compat_impl { + ($parser:ty, $output:ty) => { + impl ErrorTolerantParserCompat<$output> for $parser { + fn parse_tolerant_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<($output, ParseErrors), ParseError> { + let alloc = AstAlloc::new(); + self.parse_tolerant(&alloc, file_id, lexer).map(|(t, e)| { + ( + metrics::measure_runtime!("runtime:ast_conversion", t.to_mainline()), + e, + ) + }) + } + + fn parse_strict_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<$output, ParseErrors> { + let alloc = AstAlloc::new(); + self.parse_strict(&alloc, file_id, lexer) + .map(|t| metrics::measure_runtime!("runtime:ast_conversion", t.to_mainline())) + } + } + }; +} + +generate_compat_impl!( + grammar::ExtendedTermParser, + ExtendedTerm +); +generate_compat_impl!(grammar::TermParser, crate::term::RichTerm); +generate_compat_impl!(grammar::FixedTypeParser, crate::typ::Type); + +impl ErrorTolerantParserCompat<(Vec, crate::term::RichTerm, RawSpan)> + for grammar::CliFieldAssignmentParser +{ + fn parse_tolerant_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<((Vec, crate::term::RichTerm, RawSpan), ParseErrors), ParseError> { + self.parse_tolerant(&AstAlloc::new(), file_id, lexer) + .map(|((path, term, span), e)| ((path, term.to_mainline(), span), e)) + } + + fn parse_strict_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<(Vec, crate::term::RichTerm, RawSpan), ParseErrors> { + self.parse_strict(&AstAlloc::new(), file_id, lexer) + .map(|(path, term, span)| (path, term.to_mainline(), span)) + } +} + +// This implementation doesn't do any conversion, but hide away the (useless, in this case) +// [crate::bytecode::ast::AstAlloc] parameter. +impl ErrorTolerantParserCompat> for grammar::StaticFieldPathParser { + fn parse_tolerant_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result<(Vec, ParseErrors), ParseError> { + self.parse_tolerant(&AstAlloc::new(), file_id, lexer) + } + + fn parse_strict_compat( + &self, + file_id: FileId, + lexer: lexer::Lexer, + ) -> Result, ParseErrors> { + self.parse_strict(&AstAlloc::new(), file_id, lexer) + } +} diff --git a/core/src/parser/tests.rs b/core/src/parser/tests.rs index 952a77bdeb..2ea3a00d87 100644 --- a/core/src/parser/tests.rs +++ b/core/src/parser/tests.rs @@ -1,9 +1,8 @@ use super::lexer::{Lexer, MultiStringToken, NormalToken, StringToken, SymbolicStringStart, Token}; -use super::utils::{build_record, FieldPathElem}; use crate::error::ParseError; use crate::files::Files; use crate::identifier::LocIdent; -use crate::parser::{error::ParseError as InternalParseError, ErrorTolerantParser}; +use crate::parser::{error::ParseError as InternalParseError, ErrorTolerantParserCompat}; use crate::term::Number; use crate::term::Term::*; use crate::term::{make as mk_term, Term}; @@ -16,7 +15,7 @@ fn parse(s: &str) -> Result { let id = Files::new().add("", String::from(s)); super::grammar::TermParser::new() - .parse_strict(id, Lexer::new(s)) + .parse_strict_compat(id, Lexer::new(s)) .map_err(|errs| errs.errors.first().unwrap().clone()) } @@ -38,29 +37,38 @@ fn mk_single_chunk(s: &str) -> RichTerm { } fn mk_symbolic_single_chunk(prefix: &str, s: &str) -> RichTerm { - use crate::term::record::Field; - - build_record( - [ - ( - FieldPathElem::Ident("tag".into()), - Field::from(RichTerm::from(Term::Enum("SymbolicString".into()))), - ), - ( - FieldPathElem::Ident("prefix".into()), - Field::from(RichTerm::from(Term::Enum(prefix.into()))), - ), - ( - FieldPathElem::Ident("fragments".into()), - Field::from(RichTerm::from(Array( - std::iter::once(mk_single_chunk(s)).collect(), - Default::default(), - ))), - ), - ], - Default::default(), - ) - .into() + use crate::term::{make::builder, SharedTerm}; + + let mut result: RichTerm = builder::Record::new() + .field("tag") + .value(Term::Enum("SymbolicString".into())) + .field("prefix") + .value(Term::Enum(prefix.into())) + .field("fragments") + .value(Array( + std::iter::once(mk_single_chunk(s)).collect(), + Default::default(), + )) + .into(); + + // The builder interface is nice, but it produces non recursive records. Since the new AST + // symbolic string chunks produce recursive records (they're not really recursive, but there's + // no distinction in the source syntax, and it gets translated to a `RecRecord` by default). + // + // We hack around it by "peeling off" the outer record layer and replacing it with a recursive + // record. + + let term_mut = SharedTerm::make_mut(&mut result.term); + let content = std::mem::replace(term_mut, Term::Null); + + if let Term::Record(data) = content { + *term_mut = RecRecord(data, Vec::new(), None); + result + } else { + unreachable!( + "record was built using Record::builder, expected a record term, got something else" + ) + } } #[test] diff --git a/core/src/parser/uniterm.rs b/core/src/parser/uniterm.rs index 58ac82960c..ddd3ec3157 100644 --- a/core/src/parser/uniterm.rs +++ b/core/src/parser/uniterm.rs @@ -2,26 +2,23 @@ use super::{error::InvalidRecordTypeError, *}; use error::ParseError; use indexmap::{map::Entry, IndexMap}; -use utils::{build_record, FieldDef, FieldPathElem}; use crate::{ + bytecode::ast::{ + self, + record::{FieldDef, FieldMetadata, FieldPathElem}, + typ::{EnumRow, EnumRows, RecordRow, RecordRows, Type}, + Annotation, Ast, AstAlloc, MergePriority, Node, + }, environment::Environment, identifier::Ident, position::{RawSpan, TermPos}, - term::{ - record::{Field, FieldMetadata, RecordAttrs}, - LabeledType, MergePriority, RichTerm, Term, TypeAnnotation, - }, - typ::{ - DictTypeFlavour, EnumRows, EnumRowsF, RecordRow, RecordRows, RecordRowsF, Type, TypeF, - VarKind, - }, + typ::{DictTypeFlavour, EnumRowsF, RecordRowsF, TypeF, VarKind}, }; use std::{ cell::RefCell, collections::{HashMap, HashSet}, - convert::TryFrom, }; /// A node of the uniterm AST. We only define new variants for those constructs that are common to @@ -48,25 +45,25 @@ use std::{ /// it here). If, on the other hand, we enter the rule for an infix operator as in `a + 1`, `a` will /// be converted to a `Term::Var` and the resulting uniterm will be /// `UniTermNode::Term(Term::Op2(..))`. -pub enum UniTermNode { +pub enum UniTermNode<'ast> { /// A variable. Can refer both to a term variable or a type variable. Var(LocIdent), /// A record. Can refer both to a record literal or a record type. - Record(UniRecord), + Record(UniRecord<'ast>), /// A uniterm that has been determined to be a term. - Term(RichTerm), + Term(Ast<'ast>), /// A uniterm that has been determined to be a type. - Type(Type), + Type(Type<'ast>), } /// A uniterm with positional information. -pub struct UniTerm { - node: UniTermNode, +pub struct UniTerm<'ast> { + node: UniTermNode<'ast>, pos: TermPos, } -impl From for UniTerm { - fn from(node: UniTermNode) -> Self { +impl<'ast> From> for UniTerm<'ast> { + fn from(node: UniTermNode<'ast>) -> Self { UniTerm { node, pos: TermPos::None, @@ -74,96 +71,117 @@ impl From for UniTerm { } } -impl UniTerm { +impl<'ast> UniTerm<'ast> { pub fn with_pos(mut self, pos: TermPos) -> Self { self.pos = pos; self } } +/// Similar to `TryFrom`, but takes an additional allocator for conversion from and to +/// [crate::bytecode::ast::Ast] that requires to thread an explicit allocator. +/// +/// We chose a different name than `try_from` for the method - although it has a different +/// signature from the standard `TryFrom` (two arguments vs one) - to avoid confusing the compiler +/// which would otherwise have difficulties disambiguating calls like `Ast::try_from`. +pub(crate) trait TryConvert<'ast, T> +where + Self: Sized, +{ + type Error; + + fn try_convert(alloc: &'ast AstAlloc, from: T) -> Result; +} + // For nodes such as `Type` or `Record`, the following implementation has to choose between two // positions to use: the one of the wrapping `UniTerm`, and the one stored inside the `RichTerm` or -// the `Type`. This implementation assumes that the latest set is the one of `UniTerm`, which is the -// single source of truth. -impl TryFrom for Type { +// the `Type`. This implementation assumes that the latest set is the one of `UniTerm`, which is +// the single source of truth. In fact, it happens that only the outermost uniterm position is set +// while the innermost is still `TermPos::None`. +impl<'ast> TryConvert<'ast, UniTerm<'ast>> for Type<'ast> { type Error = ParseError; - fn try_from(ut: UniTerm) -> Result { - let ty_without_pos = match ut.node { - UniTermNode::Var(id) => Type::from(TypeF::Var(id.ident())), - UniTermNode::Record(r) => Type::try_from(r)?, - UniTermNode::Type(ty) => ty, - UniTermNode::Term(rt) => { + fn try_convert(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result { + let pos = ut.pos; + + let typ = match ut.node { + UniTermNode::Var(id) => TypeF::Var(id.ident()), + UniTermNode::Record(r) => Type::try_convert(alloc, r)?.typ, + UniTermNode::Type(ty) => ty.typ, + UniTermNode::Term(ast) => { if matches!( - rt.as_ref(), - Term::Null - | Term::Bool(_) - | Term::Num(_) - | Term::Str(_) - | Term::Array(..) - | Term::Enum(_) - | Term::EnumVariant { .. } - | Term::StrChunks(..) + ast.node, + Node::Null + | Node::Bool(_) + | Node::Number(_) + | Node::String(_) + | Node::Array(_) + | Node::EnumVariant { .. } + | Node::StringChunks(_) ) { //unwrap(): uniterms are supposed to come from the parser, and thus have a //well-defined position return Err(ParseError::InvalidContract(ut.pos.unwrap())); } - Type::from(TypeF::Contract(rt)) + TypeF::Contract(alloc.ast(Ast { + node: ast.node, + pos, + })) } }; - Ok(ty_without_pos.with_pos(ut.pos)) + Ok(Type { typ, pos }) } } -impl TryFrom for RichTerm { +impl<'ast> TryConvert<'ast, UniTerm<'ast>> for Ast<'ast> { type Error = ParseError; - fn try_from(ut: UniTerm) -> Result { + fn try_convert(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result { let UniTerm { node, pos } = ut; - let rt = match node { - UniTermNode::Var(id) => RichTerm::new(Term::Var(id), pos), - UniTermNode::Record(r) => RichTerm::try_from(r)?, - UniTermNode::Type(mut typ) => { - typ.fix_type_vars(pos.unwrap())?; - if let TypeF::Contract(rt) = typ.typ { - rt.with_pos(pos) - } else { - let contract = typ - .contract() - .map_err(|err| ParseError::UnboundTypeVariables(vec![err.0]))?; - RichTerm::new(Term::Type { typ, contract }, pos) + let node = match node { + UniTermNode::Var(id) => Node::Var(id), + UniTermNode::Record(r) => Ast::try_convert(alloc, r)?.node, + UniTermNode::Type(typ) => { + let typ = typ.fix_type_vars(alloc, pos.unwrap())?; + + if let TypeF::Contract(ctr) = typ.typ { + ctr.node.clone() + } else { + alloc.typ(typ) } } - UniTermNode::Term(rt) => rt, + UniTermNode::Term(ast) => ast.node, }; - Ok(rt.with_pos(pos)) + Ok(Ast { node, pos }) } } -impl From for UniTerm { - fn from(rt: RichTerm) -> Self { - let pos = rt.pos; +impl<'ast> From> for UniTerm<'ast> { + fn from(ast: Ast<'ast>) -> Self { + let pos = ast.pos; UniTerm { - node: UniTermNode::Term(rt), + node: UniTermNode::Term(ast), pos, } } } -impl From for UniTerm { - fn from(t: Term) -> Self { - Self::from(RichTerm::from(t)) +impl<'ast> From> for UniTerm<'ast> { + fn from(node: Node<'ast>) -> Self { + UniTerm { + node: UniTermNode::Term(node.into()), + pos: TermPos::None, + } } } -impl From for UniTerm { - fn from(ty: Type) -> Self { +impl<'ast> From> for UniTerm<'ast> { + fn from(ty: Type<'ast>) -> Self { let pos = ty.pos; UniTerm { node: UniTermNode::Type(ty), @@ -172,8 +190,8 @@ impl From for UniTerm { } } -impl From for UniTerm { - fn from(ur: UniRecord) -> Self { +impl<'ast> From> for UniTerm<'ast> { + fn from(ur: UniRecord<'ast>) -> Self { let pos = ur.pos; UniTerm { @@ -183,19 +201,30 @@ impl From for UniTerm { } } +impl<'ast, T, U> TryConvert<'ast, T> for U +where + U: TryFrom, +{ + type Error = U::Error; + + fn try_convert(_: &AstAlloc, from: T) -> Result { + U::try_from(from) + } +} + /// A record in the `UniTerm` syntax. #[derive(Clone)] -pub struct UniRecord { - pub fields: Vec, - pub tail: Option<(RecordRows, TermPos)>, - pub attrs: RecordAttrs, +pub struct UniRecord<'ast> { + pub fields: Vec>, + pub tail: Option<(RecordRows<'ast>, TermPos)>, + pub open: bool, pub pos: TermPos, /// The position of the final ellipsis `..`, if any. Used for error reporting. `pos_ellipsis` /// must be different from `TermPos::None` if and only if `attrs.open` is `true`. pub pos_ellipsis: TermPos, } -impl UniRecord { +impl<'ast> UniRecord<'ast> { /// Check if a field definition has a type annotation but no definition. This is currently /// forbidden for record literals that aren't record types. In that case, raise the /// corresponding parse error. @@ -241,16 +270,13 @@ impl UniRecord { let first_without_def = self.fields.iter().find_map(|field_def| { let path_as_ident = field_def.path_as_ident(); - match &field_def.field { - Field { + match &field_def { + FieldDef { + path: _, value: None, metadata: FieldMetadata { - annotation: - TypeAnnotation { - typ: Some(labeled_ty), - .. - }, + annotation: Annotation { typ: Some(typ), .. }, .. }, .. @@ -269,7 +295,7 @@ impl UniRecord { Entry::Vacant(vacant_entry) => { vacant_entry.insert(FieldState::Candidate(( ident.pos.unwrap(), - labeled_ty.label.span, + typ.pos.unwrap(), ))); None } @@ -277,11 +303,11 @@ impl UniRecord { } // We don't do anything smart for composite paths: we raise an error right way else { - Some((field_def.pos.unwrap(), labeled_ty.label.span)) + Some((field_def.pos.unwrap(), typ.pos.unwrap())) } } - field => { - if let (Some(ident), Some(_)) = (path_as_ident, &field.value) { + field_def => { + if let (Some(ident), Some(_)) = (path_as_ident, &field_def.value) { candidate_fields.insert(ident.ident(), FieldState::Defined); } @@ -310,21 +336,21 @@ impl UniRecord { } /// Checks if this record qualifies as a record type. If this function - /// returns true, then `into_type_strict()` must succeed. + /// returns `true`, then [Self::into_type_strict] must succeed. pub fn is_record_type(&self) -> bool { self.fields.iter().all(|field_def| { // Field paths with a depth > 1 are not supported in record types. field_def.path.len() == 1 // Warning: this pattern must stay in sync with the // corresponding pattern in `into_type_strict`. - && matches!(&field_def.field, - Field { + && matches!(&field_def, + FieldDef { value: None, metadata: FieldMetadata { doc: None, annotation: - TypeAnnotation { + Annotation { typ: Some(_), contracts, }, @@ -332,50 +358,49 @@ impl UniRecord { not_exported: false, priority: MergePriority::Neutral, }, - // At this stage, this field should always be empty. It's a run-time thing, and - // is only filled during program transformation. - pending_contracts: _, + .. } if contracts.is_empty()) }) } - /// A plain record type, uniquely containing fields of the form `fields: - /// Type`. Currently, this doesn't support the field path syntax: - /// `{foo.bar.baz : Type}.into_type_strict()` returns an `Err`. - pub fn into_type_strict(self) -> Result { - fn term_to_record_rows( + /// Turns this record into a plain record type, uniquely containing fields of the form `fields: + /// Type`. Currently, this doesn't support the field path syntax: `{foo.bar.baz : + /// Type}.into_type_strict()` returns an `Err`. + pub fn into_type_strict( + self, + alloc: &'ast AstAlloc, + ) -> Result, InvalidRecordTypeError> { + fn term_to_record_rows<'ast>( + alloc: &'ast AstAlloc, id: LocIdent, - field_def: FieldDef, - tail: RecordRows, - ) -> Result { - // At parsing stage, all `Rc`s must be 1-counted. We can thus call - // `into_owned()` without risking to actually clone anything. - match field_def.field { + field_def: FieldDef<'ast>, + tail: RecordRows<'ast>, + ) -> Result, InvalidRecordTypeError> { + match field_def { // Warning: this pattern must stay in sync with the corresponding pattern in // `is_record_type`. - Field { + FieldDef { + path: _, value: None, metadata: FieldMetadata { doc: None, annotation: - TypeAnnotation { - typ: Some(labeled_ty), - contracts, + Annotation { + typ: Some(typ), + contracts: [], }, opt: false, not_exported: false, priority: MergePriority::Neutral, }, - // At this stage, this field should always be empty. It's a run-time thing, and - // is only filled during program transformation. - pending_contracts: _, - } if contracts.is_empty() => Ok(RecordRows(RecordRowsF::Extend { + pos: _, + } => Ok(RecordRows(RecordRowsF::Extend { row: RecordRow { id, - typ: Box::new(labeled_ty.typ), + typ: alloc.type_data(typ.typ, typ.pos), }, - tail: Box::new(tail), + tail: alloc.record_rows(tail.0), })), _ => { Err(InvalidRecordTypeError::InvalidField( @@ -388,7 +413,7 @@ impl UniRecord { // An open record (with an ellipsis `..` at the end) can't be translated to a record type. // `pos_ellipsis` should be set iff `attrs.open` is true. - debug_assert!((self.pos_ellipsis == TermPos::None) != self.attrs.open); + debug_assert!((self.pos_ellipsis == TermPos::None) != self.open); if let Some(raw_span) = self.pos_ellipsis.into_opt() { return Err(InvalidRecordTypeError::IsOpen(raw_span)); @@ -408,33 +433,32 @@ impl UniRecord { self.tail .map(|(tail, _)| tail) .unwrap_or(RecordRows(RecordRowsF::Empty)), - |acc: RecordRows, mut field_def| { + |acc: RecordRows, field_def| { // We don't support compound paths for types, yet. // All positions can be unwrapped because we're still parsing. if field_def.path.len() > 1 { let span = field_def .path - .into_iter() - .map(|path_elem| match path_elem { - FieldPathElem::Ident(id) => id.pos.unwrap(), - FieldPathElem::Expr(rt) => rt.pos.unwrap(), - }) + .iter() + .map(|path_elem| path_elem.pos().unwrap()) .reduce(|acc, span| acc.fuse(span).unwrap_or(acc)) // We already checked that the path is non-empty. .unwrap(); Err(InvalidRecordTypeError::InvalidField(span)) } else { - let elem = field_def.path.pop().unwrap(); + let elem = field_def.path.last().unwrap(); + let id = match elem { - FieldPathElem::Ident(id) => id, + FieldPathElem::Ident(id) => *id, FieldPathElem::Expr(expr) => { - let name = expr.term.as_ref().try_str_chunk_as_static_str().ok_or( + let pos = expr.pos; + let name = expr.node.try_str_chunk_as_static_str().ok_or( InvalidRecordTypeError::InterpolatedField( field_def.pos.unwrap(), ), )?; - LocIdent::new_with_pos(name, expr.pos) + LocIdent::new_with_pos(name, pos) } }; if let Some(prev_id) = fields_seen.insert(id.ident(), id) { @@ -445,7 +469,7 @@ impl UniRecord { }); } - term_to_record_rows(id, field_def, acc) + term_to_record_rows(alloc, id, field_def, acc) } }, )?; @@ -461,74 +485,82 @@ impl UniRecord { } } -impl TryFrom for RichTerm { +impl<'ast> TryConvert<'ast, UniRecord<'ast>> for Ast<'ast> { type Error = ParseError; /// Convert a `UniRecord` to a term. If the `UniRecord` is syntactically a record type or it - /// has a tail, it is first interpreted as a type and then wrapped in a `Term::Types`. One + /// has a tail, it is first interpreted as a type and then wrapped in a `Term::Type`. One /// exception is the empty record, which behaves the same both as a type and a contract, and - /// turning an empty record literal to an opaque function would break everything. + /// turning an empty record literal to an opaque contract would break everything, so the empty + /// record is always interpreted as a term directly. /// - /// Otherwise it is interpreted as a record directly. Fail if the `UniRecord` has a tail but - /// isn't syntactically a record type either. Elaborate field paths `foo.bar = value` to the - /// expanded form `{foo = {bar = value}}`. + /// If the unirecord isn't a record type and doesn't have a tail, it is interpreted as an + /// equivalent record term. Fail if the `UniRecord` has a tail but isn't syntactically a record + /// type either. Elaborate field paths `foo.bar = value` to the expanded form `{foo = {bar = + /// value}}`. /// /// We also fix the type variables of the type appearing inside annotations (see in-code /// documentation of the private symbol `FixTypeVars::fix_type_vars`). - fn try_from(ur: UniRecord) -> Result { + fn try_convert(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result { let pos = ur.pos; // First try to interpret this record as a type. - let result = if ur.tail.is_some() || (ur.is_record_type() && !ur.fields.is_empty()) { + if ur.tail.is_some() || (ur.is_record_type() && !ur.fields.is_empty()) { let tail_span = ur.tail.as_ref().and_then(|t| t.1.into_opt()); // We unwrap all positions: at this stage of the parsing, they must all be set - let mut typ = ur - .into_type_strict() - .map_err(|cause| ParseError::InvalidRecordType { - tail_span, - record_span: pos.unwrap(), - cause, - })?; + let typ = + ur.into_type_strict(alloc) + .map_err(|cause| ParseError::InvalidRecordType { + tail_span, + record_span: pos.unwrap(), + cause, + })?; - typ.fix_type_vars(pos.unwrap())?; - let contract = typ - .contract() - .map_err(|err| ParseError::UnboundTypeVariables(vec![err.0]))?; + let typ = typ.fix_type_vars(alloc, pos.unwrap())?; - Ok(RichTerm::new(Term::Type { typ, contract }, pos)) + Ok(alloc.typ(typ).spanned(pos)) } else { ur.check_typed_field_without_def()?; - let UniRecord { fields, attrs, .. } = ur; - let elaborated = fields + let UniRecord { fields, open, .. } = ur; + + let field_defs_fixed = fields .into_iter() - .map(|mut field_def| { - fix_field_types(&mut field_def.field.metadata, field_def.pos.unwrap())?; - Ok(field_def.elaborate()) + .map(|field_def| { + Ok(FieldDef { + metadata: fix_field_types( + alloc, + field_def.metadata, + field_def.pos.unwrap(), + )?, + ..field_def + }) }) .collect::, _>>()?; - let record_term = RichTerm::from(build_record(elaborated, attrs)); - Ok(record_term) - }; - - result.map(|rt| rt.with_pos(pos)) + Ok(alloc + .record(ast::record::Record { + field_defs: alloc.alloc_iter(field_defs_fixed), + open, + }) + .spanned(pos)) + } } } /// Try to convert a `UniRecord` to a type. The strict part means that the `UniRecord` must be -impl TryFrom for Type { +impl<'ast> TryConvert<'ast, UniRecord<'ast>> for Type<'ast> { type Error = ParseError; /// Convert a `UniRecord` to a type. If the `UniRecord` has a tail, it is interpreted strictly /// as a type and fail if it isn't a plain record type. Otherwise, we first try to interpret it /// as a plain record type, and if that doesn't work, we interpret it as a term and wrap it /// back as a user-defined contract. - fn try_from(ur: UniRecord) -> Result { + fn try_convert(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result { let pos = ur.pos; if let Some((_, tail_pos)) = ur.tail { - ur.into_type_strict() + ur.into_type_strict(alloc) .map_err(|cause| ParseError::InvalidRecordType { tail_span: tail_pos.into_opt(), record_span: pos.unwrap(), @@ -536,9 +568,9 @@ impl TryFrom for Type { }) } else { let pos = ur.pos; - ur.clone().into_type_strict().or_else(|_| { - RichTerm::try_from(ur).map(|rt| Type { - typ: TypeF::Contract(rt), + ur.clone().into_type_strict(alloc).or_else(|_| { + Ast::try_convert(alloc, ur).map(|ast| Type { + typ: TypeF::Contract(alloc.ast(ast)), pos, }) }) @@ -616,10 +648,13 @@ impl VarKindCell { } } -pub(super) trait FixTypeVars { +pub(super) trait FixTypeVars<'ast> +where + Self: Sized, +{ /// Post-process a type at the right hand side of an annotation by replacing each unbound type /// variable `TypeF::Var(id)` by a term variable with the same identifier seen as a custom - /// contract `TypeF::Contract(Term::Var(id))`. + /// contract `TypeF::Contract(Node::Var(id))`. /// /// Additionally, this passes determine the kind of a variable introduced by a forall binder. /// @@ -629,8 +664,8 @@ pub(super) trait FixTypeVars { /// variables occurring in types, we often can't know right away if such a variable occurrence /// will eventually be a type variable or a term variable seen as a custom contract. /// - /// Take for example `a -> b`. At this stage, `a` and `b` could be both variables referring to a - /// contract (e.g. in `x | a -> b`) or a type variable (e.g. in `x | forall a b. a -> b`), + /// Take for example `a -> b`. At this stage, `a` and `b` could be both variables referring to + /// a contract (e.g. in `x | a -> b`) or type variables (e.g. in `x | forall a b. a -> b`), /// depending on enclosing `forall`s. To handle both cases, we initially parse all variables /// inside types as type variables. When reaching the right-hand side of an annotation, because /// `forall`s can only bind locally in a type, we can then decide the actual nature of each @@ -638,9 +673,10 @@ pub(super) trait FixTypeVars { /// that are not actually bound by a `forall` to be term variables. This is the role of /// `fix_type_vars()`. /// - /// Once again because `forall`s only bind variables locally, and don't bind inside contracts, - /// we don't have to recurse into contracts and this pass will only visit each node of the AST - /// at most once in total (and most probably much less so). + /// Since `forall`s only bind type variables locally and cross contract boundaries, we don't + /// have to recurse into contracts and this pass will only visit each node of the AST at most + /// once in total (and most probably much less so). In some sense, we just visit the type + /// layer, or type spine, composed only of type constructors. /// /// There is one subtlety with unirecords, though. A unirecord can still be in interpreted as a /// record type later. Take the following example: @@ -675,26 +711,74 @@ pub(super) trait FixTypeVars { /// # this is inconsistent and will raise a parse error /// forall a. [| 'foo, 'bar; a |] -> {foo : Str, bar: Str; a} /// ``` - fn fix_type_vars(&mut self, span: RawSpan) -> Result<(), ParseError> { - self.fix_type_vars_env(BoundVarEnv::new(), span) + fn fix_type_vars(self, alloc: &'ast AstAlloc, span: RawSpan) -> Result { + Ok(self + .fix_type_vars_env(alloc, BoundVarEnv::new(), span)? + .unwrap_or(self)) + } + + /// Same as [Self::fix_type_vars], but takes `self` as a reference instead, and returns + /// `Ok(None)` when `self` hasn't been modified by the type fixing phase or + /// `Ok(Some(new_self))` with a modified, owned `self` upon change. + fn fix_type_vars_ref( + &self, + alloc: &'ast AstAlloc, + span: RawSpan, + ) -> Result, ParseError> { + self.fix_type_vars_env(alloc, BoundVarEnv::new(), span) } /// Fix type vars in a given environment of variables bound by foralls enclosing this type. The /// environment maps bound variables to a reference to the variable kind of the corresponding /// forall. + /// + /// # Ownership + /// + /// [Self::fix_type_vars_env] might need to be called both on owned data and on immutably + /// borrowed data (e.g. [`Type`][crate::bytecode::ast::typ::Type] and [`&'ast + /// Type`][crate::bytecode::ast::typ::Type]). We don't want to duplicate the logic of + /// [Self::fix_type_vars_env] for both, as we can't write one that is generic enough while + /// properly avoiding useless allocations. + /// + /// The idea of the current API is that even when operating on owned data, `self` is taken by + /// reference. If `self` isn't modified by the fix type phase, then `None` is returned and the + /// caller can just reuse the original `self` how they please. + /// + /// If `self` has been modified by the fix type phase, then `Some(new_value)` is returned with + /// a new owned version of `self`. If the caller needed an owned version, the job is done. + /// Otherwise, the caller can use [the ast allocator `alloc`][crate::bytecode::ast::AstAlloc] + /// to move the owned data into the allocator and get an `&'ast` reference out of it. The only + /// cost is that for owned data, we could have reused the original `self` instead of returning + /// a new one, but this is a detail: in practice, only the top-level call of `fix_type_vars` is + /// performed on owned data, and the recursive calls are all performed on `&'ast` references. + /// At worse, we waste the top-level node, which is stack-allocated anyway. + /// + /// Because AST nodes are allocated in an arena and are immutable, they won't be reclaimed + /// until the whole AST is finally transformed to either the mainline AST or (in the future) + /// compiled to bytecode. We want to avoid building useless copies of existing nodes, which is + /// the reason behind not using a simpler strategy of just always returning a new value, that + /// might be identical to the old one if no type variable has been fixed. fn fix_type_vars_env( - &mut self, + &self, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError>; + ) -> Result, ParseError>; } -impl FixTypeVars for Type { +impl<'ast> FixTypeVars<'ast> for Type<'ast> { fn fix_type_vars_env( - &mut self, + &self, + alloc: &'ast AstAlloc, mut bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError> { + ) -> Result, ParseError> { + use crate::bytecode::ast::typ::TypeUnr; + + let pos = self.pos; + + let build_fixed = |new_type: TypeUnr<'ast>| -> Self { Type { typ: new_type, pos } }; + match self.typ { TypeF::Dyn | TypeF::Number @@ -705,14 +789,23 @@ impl FixTypeVars for Type { | TypeF::Contract(_) // We don't fix type variables inside a dictionary contract. A dictionary contract // should not be considered as a static type, but instead work as a contract. In - // particular mustn't be allowed to capture type variables from the enclosing type: see + // particular we forbid capturing type variables from the enclosing type: see // https://github.com/tweag/nickel/issues/1228. | TypeF::Dict { flavour: DictTypeFlavour::Contract, ..} - | TypeF::Wildcard(_) => Ok(()), - TypeF::Arrow(ref mut s, ref mut t) => { - (*s).fix_type_vars_env(bound_vars.clone(), span)?; - (*t).fix_type_vars_env(bound_vars, span)?; - Ok(()) + | TypeF::Wildcard(_) => Ok(None), + TypeF::Arrow(src, tgt) => { + let src_result = src.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + let tgt_result = tgt.fix_type_vars_env(alloc, bound_vars, span)?; + + if src_result.is_some() || tgt_result.is_some() { + let src = src_result.map(|new_src| alloc.type_move(new_src)).unwrap_or(src); + let tgt = tgt_result.map(|new_tgt| alloc.type_move(new_tgt)).unwrap_or(tgt); + + Ok(Some(build_fixed(TypeF::Arrow(src, tgt)))) + } + else { + Ok(None) + } } TypeF::Var(sym) => { if let Some(cell) = bound_vars.get(&sym) { @@ -721,155 +814,275 @@ impl FixTypeVars for Type { ty_var: LocIdent::from(sym).with_pos(self.pos), span })?; + + Ok(None) } else { let id = LocIdent::from(sym).with_pos(self.pos); - self.typ = TypeF::Contract(RichTerm::new(Term::Var(id), id.pos)); + + Ok(Some(build_fixed(TypeF::Contract(alloc.ast(Ast { + node: Node::Var(id), + pos: id.pos, + }))))) } - Ok(()) } TypeF::Forall { - ref var, - ref mut var_kind, - ref mut body, + var, + var_kind: ref prev_var_kind, + body, } => { - // We span a new VarKindCell and put it in the environment. The recursive calls to - // fix_type_vars will fill this cell with the correct kind, which we get afterwards - // to set the right value for `var_kind`. + // We span a new `VarKindCell` and put it in the environment. The recursive calls + // to `fix_type_vars` will fill this cell with the correct kind, which we get + // afterwards to set the right value for `var_kind`. bound_vars.insert(var.ident(), VarKindCell::new()); -// let x : forall a. { _foo: forall a. a, bar: { ; a } } - (*body).fix_type_vars_env(bound_vars.clone(), span)?; - // unwrap(): We just inserted a value for `var` above, and environment can never + let body_fixed = body.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + + // unwrap(): we just inserted a value for `var` above, and environment can never // delete values. - // take_var_kind(): Once we leave the body of this forall, we no longer need + // take_var_kind(): once we leave the body of this forall, we no longer need // access to this VarKindCell in bound_vars. We can avoid a clone by taking // the var_kind out. We could also take the whole key value pair out of the // `Environment`, but ownership there is trickier. - *var_kind = bound_vars + let var_kind = bound_vars .get(&var.ident()) .unwrap() .take_var_kind() .unwrap_or_default(); - Ok(()) + // By default, the parser sets `var_kind` to `Type`. If the `var_kind` turns out to + // actually be `Type`, and the body hasn' changed, we can avoid any cloning and + // return `Ok(None)`. Otherwise, we have to build a new `TypeF::Forall`. We still + // want to defend against callers that wouldn't follow this convention (that + // `prev_var_kind` is necessarily `Type` before fixing), so we still check it. + if body_fixed.is_some() || !matches!((&var_kind, &prev_var_kind), (&VarKind::Type, &VarKind::Type)) { + let body = body_fixed.map(|body| alloc.alloc(body)).unwrap_or(body); + + Ok(Some(build_fixed(TypeF::Forall { + var, + var_kind, + body, + }))) + } else { + Ok(None) + } } TypeF::Dict { - type_fields: ref mut ty, - flavour: DictTypeFlavour::Type - } | TypeF::Array(ref mut ty) => { - (*ty).fix_type_vars_env(bound_vars, span) + type_fields, + flavour: flavour @ DictTypeFlavour::Type + } => { + Ok(type_fields.fix_type_vars_env(alloc, bound_vars, span)?.map(|ty| { + build_fixed(TypeF::Dict { + type_fields: alloc.type_move(ty), + flavour, + }) + })) + } + TypeF::Array(ty) => { + Ok(ty.fix_type_vars_env(alloc, bound_vars, span)?.map(|ty| + build_fixed(TypeF::Array(alloc.type_move(ty))))) + } + TypeF::Enum(ref erows) => { + Ok(erows.fix_type_vars_env(alloc, bound_vars, span)?.map(|erows| + build_fixed(TypeF::Enum(erows)) + )) + } + TypeF::Record(ref rrows) => { + Ok(rrows.fix_type_vars_env(alloc, bound_vars, span)?.map(|rrows| + build_fixed(TypeF::Record(rrows)) + )) } - TypeF::Enum(ref mut erows) => erows.fix_type_vars_env(bound_vars, span), - TypeF::Record(ref mut rrows) => rrows.fix_type_vars_env(bound_vars, span), } } } -impl FixTypeVars for RecordRows { +impl<'ast> FixTypeVars<'ast> for RecordRows<'ast> { fn fix_type_vars_env( - &mut self, + &self, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError> { - fn helper( - rrows: &mut RecordRows, + ) -> Result, ParseError> { + fn do_fix<'ast>( + rrows: &RecordRows<'ast>, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, mut maybe_excluded: HashSet, - ) -> Result<(), ParseError> { + ) -> Result>, ParseError> { match rrows.0 { - RecordRowsF::Empty => Ok(()), - RecordRowsF::TailDyn => Ok(()), + RecordRowsF::Empty | RecordRowsF::TailDyn => Ok(None), // We can't have a contract in tail position, so we don't fix `TailVar`. However, we // have to set the correct kind for the corresponding forall binder. - RecordRowsF::TailVar(ref id) => { + RecordRowsF::TailVar(id) => { if let Some(cell) = bound_vars.get(&id.ident()) { cell.try_set(VarKind::RecordRows { excluded: maybe_excluded, }) - .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: *id, span })?; + .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: id, span })?; } - Ok(()) + + Ok(None) } - RecordRowsF::Extend { - ref mut row, - ref mut tail, - } => { + RecordRowsF::Extend { ref row, tail } => { maybe_excluded.insert(row.id.ident()); - row.typ.fix_type_vars_env(bound_vars.clone(), span)?; - helper(tail, bound_vars, span, maybe_excluded) + + let row_fixed = row.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + let tail_fixed = do_fix(tail, alloc, bound_vars, span, maybe_excluded)?; + + if row_fixed.is_some() || tail_fixed.is_some() { + let row = row_fixed.unwrap_or_else(|| row.clone()); + let tail = tail_fixed + .map(|tail_fixed| alloc.record_rows_move(tail_fixed)) + .unwrap_or(tail); + + Ok(Some(RecordRows(RecordRowsF::Extend { row, tail }))) + } else { + Ok(None) + } } } } - helper(self, bound_vars, span, HashSet::new()) + do_fix(self, alloc, bound_vars, span, HashSet::new()) } } -impl FixTypeVars for EnumRows { +impl<'ast> FixTypeVars<'ast> for RecordRow<'ast> { fn fix_type_vars_env( - &mut self, + &self, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError> { - fn do_fix( - erows: &mut EnumRows, + ) -> Result, ParseError> { + Ok(self + .typ + .fix_type_vars_env(alloc, bound_vars, span)? + .map(|typ| RecordRow { + id: self.id, + typ: alloc.type_move(typ), + })) + } +} + +impl<'ast> FixTypeVars<'ast> for EnumRows<'ast> { + fn fix_type_vars_env( + &self, + alloc: &'ast AstAlloc, + bound_vars: BoundVarEnv, + span: RawSpan, + ) -> Result, ParseError> { + fn do_fix<'ast>( + erows: &EnumRows<'ast>, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, mut maybe_excluded: HashSet, - ) -> Result<(), ParseError> { + ) -> Result>, ParseError> { match erows.0 { - EnumRowsF::Empty => Ok(()), - // We can't have a contract in tail position, so we don't fix `TailVar`. However, we - // have to set the correct kind for the corresponding forall binder. - EnumRowsF::TailVar(ref id) => { + EnumRowsF::Empty => Ok(None), + // We can't have a contract in tail position, so we don't fix `TailVar` itself. + // However, we have to set the correct kind for the corresponding forall binder. + EnumRowsF::TailVar(id) => { if let Some(cell) = bound_vars.get(&id.ident()) { cell.try_set(VarKind::EnumRows { excluded: maybe_excluded, }) - .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: *id, span })?; + .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: id, span })?; } - Ok(()) + + Ok(None) } - EnumRowsF::Extend { - ref mut row, - ref mut tail, - } => { - if let Some(ref mut typ) = row.typ { - // Enum tags (when `typ` is `None`) can't create a conflict, so we ignore them - // for constraints. See the documentation of `typecheck::unif::RowConstrs`. + EnumRowsF::Extend { ref row, tail } => { + // Enum tags (when `typ` is `None`) can't create a conflict, so we ignore them + // for constraints. See the documentation of `typecheck::unif::RowConstrs`. + if row.typ.is_some() { maybe_excluded.insert(row.id.ident()); - typ.fix_type_vars_env(bound_vars.clone(), span)?; } - do_fix(tail, bound_vars, span, maybe_excluded) + let row_fixed = row.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + let tail_fixed = do_fix(tail, alloc, bound_vars, span, maybe_excluded)?; + + if row_fixed.is_some() || tail_fixed.is_some() { + let row = row_fixed.unwrap_or_else(|| row.clone()); + let tail = tail_fixed + .map(|tail_fixed| alloc.enum_rows_move(tail_fixed)) + .unwrap_or(tail); + + Ok(Some(EnumRows(EnumRowsF::Extend { row, tail }))) + } else { + Ok(None) + } } } } - do_fix(self, bound_vars, span, HashSet::new()) + do_fix(self, alloc, bound_vars, span, HashSet::new()) } } -/// Fix the type variables of types appearing as annotations of record fields. See the in-code -/// documentation of the private symbol `Types::fix_type_vars`. -pub fn fix_field_types(metadata: &mut FieldMetadata, span: RawSpan) -> Result<(), ParseError> { - use std::rc::Rc; - - if let Some(LabeledType { - typ: ref mut types, .. - }) = metadata.annotation.typ - { - types.fix_type_vars(span)?; - } - - for ctr in metadata.annotation.contracts.iter_mut() { - ctr.typ.fix_type_vars(span)?; +impl<'ast> FixTypeVars<'ast> for EnumRow<'ast> { + fn fix_type_vars_env( + &self, + alloc: &'ast AstAlloc, + bound_vars: BoundVarEnv, + span: RawSpan, + ) -> Result, ParseError> { + // `maybe_fixed` is `Some(ty)` if and only if this enum rows has an associated + // type *and* the type has been changed by fixing. + let maybe_fixed = self + .typ + .as_ref() + .map(|ty| { + // Enum tags (when `typ` is `None`) can't create a conflict, so we ignore them + // for constraints. See the documentation of `typecheck::unif::RowConstrs`. + ty.fix_type_vars_env(alloc, bound_vars.clone(), span) + }) + .transpose()? + .flatten(); - // Although type variables and term variables are currently printed the same, fixing the - // type stored in the label is still better, including to have proper deduplication of - // contracts when pretty printing the result of evaluation back. - ctr.label.typ = Rc::new(ctr.typ.clone()); + Ok(maybe_fixed.map(|typ| EnumRow { + id: self.id, + typ: Some(alloc.type_move(typ)), + })) } +} - Ok(()) +/// Fix the type variables of types appearing as annotations of record fields. See the in-code +/// documentation of the private symbol `Types::fix_type_vars`. +pub fn fix_field_types<'ast>( + alloc: &'ast AstAlloc, + metadata: FieldMetadata<'ast>, + span: RawSpan, +) -> Result, ParseError> { + use std::borrow::Cow; + + let typ = metadata + .annotation + .typ + .map(|typ| typ.fix_type_vars(alloc, span)) + .transpose()?; + + let contracts: Result>, ParseError> = metadata + .annotation + .contracts + .iter() + .map(|ctr| { + Ok(ctr + .fix_type_vars_ref(alloc, span)? + .map(Cow::Owned) + .unwrap_or(Cow::Borrowed(ctr))) + }) + .collect(); + let contracts = contracts?; + + // If none of the contracts have been changed, we can keep the original `[Type]` allocation. + let contracts = if contracts.iter().all(|cow| matches!(cow, Cow::Borrowed(_))) { + metadata.annotation.contracts + } else { + alloc.types(contracts.into_iter().map(|cow| cow.into_owned())) + }; + + Ok(FieldMetadata { + annotation: Annotation { typ, contracts }, + ..metadata + }) } diff --git a/core/src/parser/utils.rs b/core/src/parser/utils.rs index 0cedb31a61..946f8029a1 100644 --- a/core/src/parser/utils.rs +++ b/core/src/parser/utils.rs @@ -1,45 +1,46 @@ //! Various helpers and companion code for the parser are put here to keep the grammar definition //! uncluttered. -use indexmap::map::Entry; -use std::ffi::OsString; -use std::rc::Rc; -use std::{collections::HashSet, fmt::Debug}; - -use self::pattern::bindings::Bindings as _; +use std::{ + ffi::OsString, + iter, + rc::Rc, + {collections::HashSet, fmt::Debug}, +}; use super::error::ParseError; -use crate::cache::InputFormat; use crate::{ - combine::Combine, - eval::{ - merge::{merge_doc, split}, - operation::RecPriority, + app, + bytecode::ast::{ + pattern::bindings::Bindings as _, + record::{FieldDef, FieldMetadata}, + *, }, + cache::InputFormat, + combine::CombineAlloc, + eval::merge::merge_doc, files::FileId, + fun, identifier::LocIdent, - label::{Label, MergeKind, MergeLabel}, - mk_app, mk_fun, + label::Label, position::{RawSpan, TermPos}, - term::pattern::{Pattern, PatternData}, - term::{ - make as mk_term, - record::{Field, FieldMetadata, RecordAttrs, RecordData}, - *, - }, + primop_app, typ::Type, }; -use malachite::num::conversion::traits::{FromSciString, FromStringBase}; +use malachite::{ + num::conversion::traits::{FromSciString, FromStringBase}, + Integer, +}; pub struct ParseNumberError; -pub fn parse_number_sci(slice: &str) -> Result { - Rational::from_sci_string(slice).ok_or(ParseNumberError) +pub fn parse_number_sci(slice: &str) -> Result { + Number::from_sci_string(slice).ok_or(ParseNumberError) } -pub fn parse_number_base(base: u8, slice: &str) -> Result { - Ok(Rational::from( +pub fn parse_number_base(base: u8, slice: &str) -> Result { + Ok(Number::from( Integer::from_string_base(base, slice).ok_or(ParseNumberError)?, )) } @@ -82,20 +83,6 @@ pub enum StringEndDelimiter { Special, } -/// Left hand side of a record field declaration. -#[derive(Clone, Debug)] -pub enum FieldPathElem { - /// A static field declaration: `{ foo = .. }` - Ident(LocIdent), - /// A quoted field declaration: `{ "%{protocol}" = .. }` - /// - /// In practice, the argument must always be `StrChunks`, but since we also need to keep track - /// of the associated span it's handier to just use a `RichTerm`. - Expr(RichTerm), -} - -pub type FieldPath = Vec; - /// A string chunk literal atom, being either a string or a single char. /// /// Because of the way the lexer handles escaping and interpolation, a contiguous static string @@ -106,168 +93,152 @@ pub enum ChunkLiteralPart { Char(char), } -/// A field definition atom. A field is defined by a path, a potential value, and associated -/// metadata. -#[derive(Clone, Debug)] -pub struct FieldDef { - pub path: FieldPath, - pub field: Field, - pub pos: TermPos, -} - -impl FieldDef { - /// Elaborate a record field definition specified as a path, like `a.b.c = foo`, into a regular - /// flat definition `a = {b = {c = foo}}`. - /// - /// # Preconditions - /// - /!\ path must be **non-empty**, otherwise this function panics - pub fn elaborate(self) -> (FieldPathElem, Field) { - let last_ident = self.path.last().and_then(|elem| match elem { - FieldPathElem::Ident(id) => Some(*id), - FieldPathElem::Expr(_) => None, - }); - - let mut it = self.path.into_iter(); - let fst = it.next().unwrap(); - - let content = it - .rev() - .fold(self.field.with_name(last_ident), |acc, path_elem| { - // We first compute a position for the intermediate generated records (it's useful - // in particular for the LSP). The position starts at the subpath corresponding to - // the intermediate record and ends at the final value. - // - // unwrap is safe here becuase the initial content has a position, and we make sure - // we assign a position for the next field. - let pos = match path_elem { - FieldPathElem::Ident(id) => id.pos, - FieldPathElem::Expr(ref expr) => expr.pos, - }; - // unwrap is safe here because every id should have a non-`TermPos::None` position - let id_span = pos.unwrap(); - let acc_span = acc - .value - .as_ref() - .map(|value| value.pos.unwrap()) - .unwrap_or(id_span); - - // `RawSpan::fuse` only returns `None` when the two spans are in different files. - // A record field and its value *must* be in the same file, so this is safe. - let pos = TermPos::Original(id_span.fuse(acc_span).unwrap()); - - match path_elem { - FieldPathElem::Ident(id) => { - let mut fields = IndexMap::new(); - fields.insert(id, acc); - Field::from(RichTerm::new( - Term::Record(RecordData { - fields, - ..Default::default() - }), - pos, - )) - } - FieldPathElem::Expr(exp) => { - let static_access = exp.term.as_ref().try_str_chunk_as_static_str(); - - if let Some(static_access) = static_access { - let id = LocIdent::new_with_pos(static_access, exp.pos); - let mut fields = IndexMap::new(); - fields.insert(id, acc); - Field::from(RichTerm::new( - Term::Record(RecordData { - fields, - ..Default::default() - }), - pos, - )) - } else { - // The record we create isn't recursive, because it is only comprised of - // one dynamic field. It's just simpler to use the infrastructure of - // `RecRecord` to handle dynamic fields at evaluation time rather than - // right here - Field::from(RichTerm::new( - Term::RecRecord(RecordData::empty(), vec![(exp, acc)], None), - pos, - )) - } - } - } - }); - - (fst, content) - } - - /// Returns the identifier corresponding to this definition if the path is composed of exactly - /// one element which is a static identifier. Returns `None` otherwise. - pub fn path_as_ident(&self) -> Option { - if self.path.len() > 1 { - return None; - } - - self.path.first().and_then(|path_elem| match path_elem { - FieldPathElem::Expr(_) => None, - FieldPathElem::Ident(ident) => Some(*ident), - }) - } -} - /// The last field of a record, that can either be a normal field declaration or an ellipsis. #[derive(Clone, Debug)] -pub enum RecordLastField { - Field(FieldDef), +pub enum RecordLastField<'ast> { + Field(FieldDef<'ast>), Ellipsis, } -/// A single binding in a let block. -#[derive(Clone, Debug)] -pub struct LetBinding { - pub pattern: Pattern, - pub annot: Option, - pub value: RichTerm, +/// The last match in a data structure pattern. This can either be a normal match, or an ellipsis +/// which can capture the rest of the data structure. The type parameter `P` is the type of the +/// pattern of the data structure (ellipsis are supported for both array and record patterns). +/// +/// # Example +/// +/// - In `{foo={}, bar}`, the last match is an normal match. +/// - In `{foo={}, bar, ..}`, the last match is a non-capturing ellipsis. +/// - In `{foo={}, bar, ..rest}`, the last match is a capturing ellipsis. +#[derive(Debug, PartialEq, Clone)] +pub enum LastPattern

{ + /// The last field is a normal match. In this case the pattern is "closed" so every record + /// fields should be matched. + Normal(P), + /// The pattern is "open" `, ..}`. Optionally you can bind a record containing the remaining + /// fields to an `Identifier` using the syntax `, ..y}`. + Ellipsis(Option), +} + +/// Trait for operators that can be eta-expanded to a function. +pub(super) trait EtaExpand { + /// Eta-expand an operator. This wraps an operator, for example `==`, as a function `fun x1 x2 + /// => x1 == x2`. Propagate the position of the curried operator to the generated primop apps + /// for better error reporting. + fn eta_expand(self, alloc: &AstAlloc, pos: TermPos) -> Node<'_>; } /// An infix operator that is not applied. Used for the curried operator syntax (e.g `(==)`) -pub enum InfixOp { - Unary(UnaryOp), - Binary(BinaryOp), -} +pub(super) struct InfixOp(pub(super) primop::PrimOp); -impl From for InfixOp { - fn from(op: UnaryOp) -> Self { - InfixOp::Unary(op) +impl EtaExpand for InfixOp { + fn eta_expand(self, alloc: &AstAlloc, pos: TermPos) -> Node<'_> { + // We could use `LocIdent::fresh` for the newly introduced function parameters. However, + // it has the issue that pretty printing them doesn't result in valid Nickel anymore. This + // is why we prefer normal identifier like `x` or `y`. + match self { + // We treat `UnaryOp::BoolAnd` and `UnaryOp::BoolOr` separately. + // + // They are unary operators taking a second lazy argument, but the current mainine + // evaluator expects that they are always fully applied (including to their argument). + // That is, Nickel currently doesn't support a partial application like `%bool_or% + // ` (which is fine, because the latter isn't actually representable in the + // source language: `BoolOr` is only expressible through the infix syntax ` || + // `). Thus, instead of eta-expanding to `fun x => x` as we would for other + // unary operators, we eta-expand to `fun x1 x2 => x1 x2`. + InfixOp(op @ primop::PrimOp::BoolAnd) | InfixOp(op @ primop::PrimOp::BoolOr) => { + let fst_arg = LocIdent::from("x"); + let snd_arg = LocIdent::from("y"); + + fun!( + alloc, + fst_arg, + snd_arg, + app!( + alloc, + primop_app!(alloc, op, builder::var(fst_arg)), + builder::var(snd_arg), + ) + .with_pos(pos), + ) + .node + } + // `RecordGet field record` corresponds to `record."%{field}"`. Using the curried + // version `(.)` has thus reversed argument corresponding to the `RecordGet` primop, so + // we need to flip them. + InfixOp(op @ primop::PrimOp::RecordGet) => { + let fst_arg = LocIdent::new("x"); + let snd_arg = LocIdent::new("y"); + + fun!( + alloc, + fst_arg, + snd_arg, + primop_app!(alloc, op, builder::var(snd_arg), builder::var(fst_arg)) + .with_pos(pos), + ) + .node + } + InfixOp(op) => { + let vars: Vec<_> = (0..op.arity()) + .map(|i| LocIdent::from(format!("x{i}"))) + .collect(); + let fun_args: Vec<_> = vars.iter().map(|arg| pattern::Pattern::any(*arg)).collect(); + let args: Vec<_> = vars.into_iter().map(builder::var).collect(); + + alloc.nary_fun(fun_args, alloc.prim_op(op, args).spanned(pos)) + } + } } } -impl From for InfixOp { - fn from(op: BinaryOp) -> Self { - InfixOp::Binary(op) - } +/// Additional infix operators that aren't proper primitive operations in the Nickel AST but are +/// still available in the surface syntax (and desugared at parsing time). They can still be used +/// in a curried form so they need a wrapper and an `EtaExpand` implementation. +pub(super) enum ExtendedInfixOp { + /// The reverse application operation or pipe operator `|>`. + ReverseApp, + /// The inequality operator `!=`. + NotEqual, } -impl InfixOp { - /// Eta-expand an operator. This wraps an operator, for example `==`, as a function `fun x1 x2 - /// => x1 == x2`. Propagate the given position to the function body, for better error - /// reporting. - pub fn eta_expand(self, pos: TermPos) -> RichTerm { - let pos = pos.into_inherited(); +impl EtaExpand for ExtendedInfixOp { + fn eta_expand(self, alloc: &AstAlloc, pos: TermPos) -> Node<'_> { match self { - // We treat `UnaryOp::BoolAnd` and `UnaryOp::BoolOr` separately. - // They should morally be binary operators, but we represent them as unary - // operators internally so that their second argument is evaluated lazily. - InfixOp::Unary(op @ UnaryOp::BoolAnd) | InfixOp::Unary(op @ UnaryOp::BoolOr) => { - mk_fun!( - "x1", - "x2", - mk_app!(mk_term::op1(op, mk_term::var("x1")), mk_term::var("x2")).with_pos(pos) + ExtendedInfixOp::ReverseApp => { + let fst_arg = LocIdent::from("x"); + let snd_arg = LocIdent::from("y"); + + fun!( + alloc, + fst_arg, + snd_arg, + app!(alloc, builder::var(snd_arg), builder::var(fst_arg)).with_pos(pos), ) + .node + } + ExtendedInfixOp::NotEqual => { + let fst_arg = LocIdent::from("x"); + let snd_arg = LocIdent::from("y"); + + fun!( + alloc, + fst_arg, + snd_arg, + primop_app!( + alloc, + primop::PrimOp::BoolNot, + primop_app!( + alloc, + primop::PrimOp::Eq, + builder::var(fst_arg), + builder::var(snd_arg), + ) + .with_pos(pos), + ) + .with_pos(pos), + ) + .node } - InfixOp::Unary(op) => mk_fun!("x", mk_term::op1(op, mk_term::var("x")).with_pos(pos)), - InfixOp::Binary(op) => mk_fun!( - "x1", - "x2", - mk_term::op2(op, mk_term::var("x1"), mk_term::var("x2")).with_pos(pos) - ), } } } @@ -275,28 +246,14 @@ impl InfixOp { /// Trait for structures representing annotations which can be combined with a term to build /// another term, or another structure holding a term, such as a field. `T` is the said target /// structure. -pub trait AttachTerm { - fn attach_term(self, rt: RichTerm) -> T; +pub trait AttachToAst<'ast, T> { + fn attach_to_ast(self, alloc: &'ast AstAlloc, ast: Ast<'ast>) -> T; } -impl Combine for Option { - fn combine(left: Self, right: Self) -> Self { - match (left, right) { - (None, None) => None, - (None, Some(x)) | (Some(x), None) => Some(x), - (Some(left), Some(right)) => Some(Combine::combine(left, right)), - } - } -} - -impl Combine for FieldMetadata { +impl<'ast> CombineAlloc<'ast> for FieldMetadata<'ast> { /// Combine two field metadata into one. If data that can't be combined (typically, the /// documentation or the type annotation) are set by both, the left one's are kept. - /// - /// Note that no environment management operation such as closurization of contracts takes - /// place, because this function is expected to be used on the AST before the evaluation (in - /// the parser or during program transformation). - fn combine(left: Self, right: Self) -> Self { + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { let priority = match (left.priority, right.priority) { // Neutral corresponds to the case where no priority was specified. In that case, the // other priority takes precedence. @@ -308,7 +265,7 @@ impl Combine for FieldMetadata { FieldMetadata { doc: merge_doc(left.doc, right.doc), - annotation: Combine::combine(left.annotation, right.annotation), + annotation: CombineAlloc::combine(alloc, left.annotation, right.annotation), opt: left.opt || right.opt, // The resulting field will be suppressed from serialization if either of the fields to be merged is. not_exported: left.not_exported || right.not_exported, @@ -317,285 +274,68 @@ impl Combine for FieldMetadata { } } -impl AttachTerm for FieldMetadata { - fn attach_term(self, rt: RichTerm) -> Field { - Field { - value: Some(rt), - metadata: self, - pending_contracts: Default::default(), - } - } -} - -impl Combine for LetMetadata { - // Combine two let metadata into one. If `doc` is set by both, the left one's documentation - // is kept. - fn combine(left: Self, right: Self) -> Self { +impl<'ast> CombineAlloc<'ast> for LetMetadata<'ast> { + /// Combine two let metadata into one. Same as `FieldMetadata::combine` but restricted to the + /// metadata that can be associated to a let block. + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { LetMetadata { - doc: left.doc.or(right.doc), - annotation: Combine::combine(left.annotation, right.annotation), + doc: merge_doc(left.doc, right.doc), + annotation: CombineAlloc::combine(alloc, left.annotation, right.annotation), } } } -impl Combine for TypeAnnotation { - /// Combine two type annotations. If both have `types` set, the final type +impl<'ast> CombineAlloc<'ast> for Annotation<'ast> { + /// Combine two annotations. If both have `types` set, the final type /// is the one of the left annotation, while the right one's type is put /// inside the final `contracts`. /// /// Contracts are combined from left to right; the left one's are put first, /// then maybe the right one's type annotation and then the right one's /// contracts. - fn combine(left: Self, right: Self) -> Self { + fn combine(alloc: &'ast AstAlloc, left: Self, right: Self) -> Self { let (typ, leftover) = match (left.typ, right.typ) { (left_ty @ Some(_), right_ty @ Some(_)) => (left_ty, right_ty), (left_ty, right_ty) => (left_ty.or(right_ty), None), }; - let contracts = left + let contracts: Vec<_> = left .contracts - .into_iter() + .iter() + .cloned() .chain(leftover) - .chain(right.contracts) + .chain(right.contracts.iter().cloned()) .collect(); - TypeAnnotation { typ, contracts } + alloc.annotation(typ, contracts) } } -impl AttachTerm for TypeAnnotation { - fn attach_term(self, rt: RichTerm) -> RichTerm { +impl<'ast> AttachToAst<'ast, Ast<'ast>> for Annotation<'ast> { + fn attach_to_ast(self, alloc: &'ast AstAlloc, ast: Ast<'ast>) -> Ast<'ast> { if self.is_empty() { - return rt; - } - - let pos = rt.pos; - RichTerm::new(Term::Annotated(self, rt), pos) - } -} - -/// Some constructs are introduced with the metadata pipe operator `|`, but aren't metadata per se -/// (ex: `rec force`/`rec default`). Those are collected in this extended annotation and then -/// desugared into standard metadata. -#[derive(Clone, Debug, Default)] -pub struct FieldExtAnnot { - /// Standard metadata. - pub metadata: FieldMetadata, - /// Presence of an annotation `push force` - pub rec_force: bool, - /// Presence of an annotation `push default` - pub rec_default: bool, -} - -impl FieldExtAnnot { - pub fn new() -> Self { - Default::default() - } -} - -impl AttachTerm for FieldExtAnnot { - fn attach_term(self, value: RichTerm) -> Field { - let value = if self.rec_force || self.rec_default { - let rec_prio = if self.rec_force { - RecPriority::Top - } else { - RecPriority::Bottom - }; - - let pos = value.pos; - Some(rec_prio.apply_rec_prio_op(value).with_pos(pos)) - } else { - Some(value) - }; - - Field { - value, - metadata: self.metadata, - pending_contracts: Default::default(), - } - } -} - -impl Combine for FieldExtAnnot { - fn combine(left: Self, right: Self) -> Self { - let metadata = FieldMetadata::combine(left.metadata, right.metadata); - let rec_force = left.rec_force || right.rec_force; - let rec_default = left.rec_default || right.rec_default; - - FieldExtAnnot { - metadata, - rec_force, - rec_default, + return ast; } - } -} -impl From for FieldExtAnnot { - fn from(metadata: FieldMetadata) -> Self { - FieldExtAnnot { - metadata, - ..Default::default() + let pos = ast.pos; + Ast { + node: alloc.annotated(self, ast), + pos, } } } -/// Turn dynamic accesses using literal chunks only into static accesses -pub fn mk_access(access: RichTerm, root: RichTerm) -> RichTerm { - if let Some(label) = access.as_ref().try_str_chunk_as_static_str() { - mk_term::op1( - UnaryOp::RecordAccess(LocIdent::new_with_pos(label, access.pos)), - root, +/// Takes a record access written as `foo.""`, and either turn it into a static access +/// whenever possible (when `` is a static string without interpolation), or into a dynamic +/// `%record/get%` access otherwise. +pub fn mk_access<'ast>(alloc: &'ast AstAlloc, access: Ast<'ast>, root: Ast<'ast>) -> Node<'ast> { + if let Some(label) = access.node.try_str_chunk_as_static_str() { + alloc.prim_op( + primop::PrimOp::RecordStatAccess(LocIdent::new_with_pos(label, access.pos)), + iter::once(root), ) } else { - mk_term::op2(BinaryOp::RecordGet, access, root) - } -} - -/// Build a record from a list of field definitions. If a field is defined several times, the -/// different definitions are merged. -pub fn build_record(fields: I, attrs: RecordAttrs) -> Term -where - I: IntoIterator + Debug, -{ - let mut static_fields = IndexMap::new(); - let mut dynamic_fields = Vec::new(); - - fn insert_static_field( - static_fields: &mut IndexMap, - id: LocIdent, - field: Field, - ) { - match static_fields.entry(id) { - Entry::Occupied(mut occpd) => { - // temporarily putting an empty field in the entry to take the previous value. - let prev = occpd.insert(Field::default()); - - // unwrap(): the field's identifier must have a position during parsing. - occpd.insert(merge_fields(id.pos.unwrap(), prev, field)); - } - Entry::Vacant(vac) => { - vac.insert(field); - } - } - } - - fields.into_iter().for_each(|field| match field { - (FieldPathElem::Ident(id), t) => insert_static_field(&mut static_fields, id, t), - (FieldPathElem::Expr(e), t) => { - // Dynamic fields (whose name is defined by an interpolated string) have a different - // semantics than fields whose name can be determined statically. However, static - // fields with special characters are also parsed as string chunks: - // - // ``` - // let x = "dynamic" in {"I%am.static" = false, "%{x}" = true} - // ``` - // - // Here, both fields are parsed as `StrChunks`, but the first field is actually a - // static one, just with special characters. The following code determines which fields - // are actually static or not, and inserts them in the right location. - let static_access = e.term.as_ref().try_str_chunk_as_static_str(); - - if let Some(static_access) = static_access { - insert_static_field( - &mut static_fields, - LocIdent::new_with_pos(static_access, e.pos), - t, - ) - } else { - dynamic_fields.push((e, t)); - } - } - }); - - Term::RecRecord( - RecordData::new(static_fields, attrs, None), - dynamic_fields, - None, - ) -} - -/// Merge two fields by performing the merge of both their value (dynamically if -/// necessary, by introducing a merge operator) and their metadata (statically). -/// -/// If the values of both fields are static records ([`Term::Record`]s), their -/// merge is computed statically. This prevents building terms whose depth is -/// linear in the number of fields if partial definitions are involved. This -/// manifested in https://github.com/tweag/nickel/issues/1427. -fn merge_fields(id_span: RawSpan, field1: Field, field2: Field) -> Field { - // FIXME: We're duplicating a lot of the logic in - // [`eval::merge::merge_fields`] but not quite enough to actually factor - // it out - fn merge_values(id_span: RawSpan, t1: RichTerm, t2: RichTerm) -> RichTerm { - let RichTerm { - term: t1, - pos: pos1, - } = t1; - let RichTerm { - term: t2, - pos: pos2, - } = t2; - match (t1.into_owned(), t2.into_owned()) { - (Term::Record(rd1), Term::Record(rd2)) => { - let split::SplitResult { - left, - center, - right, - } = split::split(rd1.fields, rd2.fields); - let mut fields = IndexMap::with_capacity(left.len() + center.len() + right.len()); - fields.extend(left); - fields.extend(right); - for (id, (field1, field2)) in center.into_iter() { - fields.insert(id, merge_fields(id_span, field1, field2)); - } - Term::Record(RecordData::new( - fields, - RecordAttrs::combine(rd1.attrs, rd2.attrs), - None, - )) - .into() - } - (t1, t2) => mk_term::op2( - BinaryOp::Merge(MergeLabel { - span: id_span, - kind: MergeKind::PiecewiseDef, - }), - RichTerm::new(t1, pos1), - RichTerm::new(t2, pos2), - ), - } - } - - let (value, priority) = match (field1.value, field2.value) { - (Some(t1), Some(t2)) if field1.metadata.priority == field2.metadata.priority => ( - Some(merge_values(id_span, t1, t2)), - field1.metadata.priority, - ), - (Some(t), _) if field1.metadata.priority > field2.metadata.priority => { - (Some(t), field1.metadata.priority) - } - (_, Some(t)) if field1.metadata.priority < field2.metadata.priority => { - (Some(t), field2.metadata.priority) - } - (Some(t), None) => (Some(t), field1.metadata.priority), - (None, Some(t)) => (Some(t), field2.metadata.priority), - (None, None) => (None, Default::default()), - _ => unreachable!(), - }; - - // At this stage, pending contracts aren't filled nor meaningful, and should all be empty. - debug_assert!(field1.pending_contracts.is_empty() && field2.pending_contracts.is_empty()); - Field { - value, - // [`FieldMetadata::combine`] produces subtly different behaviour from - // the runtime merging code, which is what we need to replicate here - metadata: FieldMetadata { - doc: merge_doc(field1.metadata.doc, field2.metadata.doc), - annotation: Combine::combine(field1.metadata.annotation, field2.metadata.annotation), - opt: field1.metadata.opt && field2.metadata.opt, - not_exported: field1.metadata.not_exported || field2.metadata.not_exported, - priority, - }, - pending_contracts: Vec::new(), + alloc.prim_op(primop::PrimOp::RecordGet, [access, root]) } } @@ -621,86 +361,42 @@ pub fn mk_label(typ: Type, src_id: FileId, l: usize, r: usize) -> Label { } } -/// Same as `mk_span`, but for merge labels. The kind is set to the default one -/// (`MergeKind::Standard`). -pub fn mk_merge_label(src_id: FileId, l: usize, r: usize) -> MergeLabel { - MergeLabel { - span: mk_span(src_id, l, r), - kind: Default::default(), - } -} - -/// Generate a `Let` or a `LetPattern` (depending on whether there's a binding -/// with a record pattern) from the parsing of a let definition. -pub fn mk_let( +/// Checks that there are no duplicate bindings in a let block (when bindins are simple, that is +/// they aren't pattern), and builds the corresponding let block node if the check passes. +pub fn mk_let<'ast>( + alloc: &'ast AstAlloc, rec: bool, - bindings: Vec, - body: RichTerm, -) -> Result { - let all_simple = bindings - .iter() - .all(|b| matches!(b.pattern.data, PatternData::Any(_))); - + bindings: Vec>, + body: Ast<'ast>, +) -> Result, ParseError> { // Check for duplicate names across the different bindings. We // don't check for duplicate names within a single binding because // there are backwards-compatibility constraints (e.g., see // `RecordPattern::check_dup`). let mut seen_bindings: HashSet = HashSet::new(); + for b in &bindings { let new_bindings = b.pattern.bindings(); - for (_path, id, _field) in &new_bindings { - if let Some(old) = seen_bindings.get(id) { + for binding in &new_bindings { + if let Some(old) = seen_bindings.get(&binding.id) { return Err(ParseError::DuplicateIdentInLetBlock { - ident: *id, + ident: binding.id, prev_ident: *old, }); } } - seen_bindings.extend(new_bindings.into_iter().map(|(_path, id, _field)| id)); + seen_bindings.extend(new_bindings.into_iter().map(|binding| binding.id)); } - if all_simple { - Ok(mk_term::let_in( - rec, - bindings.into_iter().map(|mut b| { - let PatternData::Any(id) = b.pattern.data else { - // unreachable: we checked for `all_simple`, meaning that - // all bindings are just Any(_). - unreachable!() - }; - if let Some(ann) = b.annot { - b.value = ann.annotation.attach_term(b.value); - } - (id, b.value) - }), - body, - )) - } else { - Ok(mk_term::let_pat_in( - rec, - bindings.into_iter().map(|mut b| { - if let Some(ann) = b.annot { - b.value = ann.annotation.attach_term(b.value); - } - (b.pattern, b.value) - }), - body, - )) - } + Ok(alloc.let_block(bindings, body, rec)) } -/// Generate a `Fun` (when the pattern is trivial) or a `FunPattern` from the parsing of a function -/// definition. This function panics if the definition somehow has neither an `Ident` nor a -/// non-`Empty` `Destruct` pattern. -pub fn mk_fun(pat: Pattern, body: RichTerm) -> Term { - match pat.data { - PatternData::Any(id) => Term::Fun(id, body), - _ => Term::FunPattern(pat, body), - } -} - -pub fn mk_import_based_on_filename(path: String, _span: RawSpan) -> Result { +pub fn mk_import_based_on_filename( + alloc: &AstAlloc, + path: String, + _span: RawSpan, +) -> Result, ParseError> { let path = OsString::from(path); let format: Option = InputFormat::from_path(std::path::Path::new(path.as_os_str())); @@ -708,19 +404,21 @@ pub fn mk_import_based_on_filename(path: String, _span: RawSpan) -> Result Result { +) -> Result, ParseError> { let path = OsString::from(path); let Some(format) = InputFormat::from_tag(format.label()) else { return Err(ParseError::InvalidImportFormat { span }); }; - Ok(Term::Import(Import::Path { path, format })) + + Ok(alloc.import_path(path, format)) } /// Determine the minimal level of indentation of a multi-line string. @@ -729,21 +427,21 @@ pub fn mk_import_explicit( /// indentation level of a line is the number of consecutive whitespace characters, which are /// either a space or a tab, counted from the beginning of the line. If a line is empty or consist /// only of whitespace characters, it is ignored. -pub fn min_indent(chunks: &[StrChunk]) -> usize { +pub fn min_indent(chunks: &[StringChunk>]) -> usize { let mut min: usize = usize::MAX; let mut current = 0; let mut start_line = true; for chunk in chunks.iter() { match chunk { - StrChunk::Expr(_, _) if start_line => { + StringChunk::Expr(_, _) if start_line => { if current < min { min = current; } start_line = false; } - StrChunk::Expr(_, _) => (), - StrChunk::Literal(s) => { + StringChunk::Expr(_, _) => (), + StringChunk::Literal(s) => { for c in s.chars() { match c { ' ' | '\t' if start_line => current += 1, @@ -822,12 +520,12 @@ pub fn min_indent(chunks: &[StrChunk]) -> usize { ///not sth /// end" /// ``` -pub fn strip_indent(mut chunks: Vec>) -> Vec> { +pub fn strip_indent(chunks: &mut [StringChunk>]) { if chunks.is_empty() { - return chunks; + return; } - let min = min_indent(&chunks); + let min = min_indent(chunks); let mut current = 0; let mut start_line = true; let chunks_len = chunks.len(); @@ -856,7 +554,7 @@ pub fn strip_indent(mut chunks: Vec>) -> Vec { + StringChunk::Literal(ref mut s) => { let mut buffer = String::new(); for c in s.chars() { match c { @@ -907,7 +605,7 @@ pub fn strip_indent(mut chunks: Vec>) -> Vec { + StringChunk::Expr(_, ref mut indent) => { if start_line { debug_assert!(current >= min); debug_assert!(expr_on_line.is_none()); @@ -923,19 +621,22 @@ pub fn strip_indent(mut chunks: Vec>) -> Vec *indent = 0, - _ => panic!(), + Some(StringChunk::Expr(_, ref mut indent)) => *indent = 0, + _ => unreachable!( + "all elements in `unindent` should be expressions, but found a literal" + ), } } - - chunks } #[cfg(test)] mod tests { - use crate::typ::TypeF; - - use super::*; + use crate::{ + combine::Combine, + label::Label, + term::{LabeledType, TypeAnnotation}, + typ::{Type, TypeF}, + }; #[test] fn contract_annotation_order() { diff --git a/core/src/pretty.rs b/core/src/pretty.rs index 7a04b1e3c6..cc0b316801 100644 --- a/core/src/pretty.rs +++ b/core/src/pretty.rs @@ -1384,7 +1384,7 @@ mod tests { use crate::parser::lexer::Lexer; use crate::parser::{ grammar::{FixedTypeParser, TermParser}, - ErrorTolerantParser, + ErrorTolerantParserCompat, }; use pretty::Doc; @@ -1396,7 +1396,7 @@ mod tests { let id = Files::new().add("", s); FixedTypeParser::new() - .parse_strict(id, Lexer::new(s)) + .parse_strict_compat(id, Lexer::new(s)) .unwrap() } @@ -1404,7 +1404,9 @@ mod tests { fn parse_term(s: &str) -> RichTerm { let id = Files::new().add("", s); - TermParser::new().parse_strict(id, Lexer::new(s)).unwrap() + TermParser::new() + .parse_strict_compat(id, Lexer::new(s)) + .unwrap() } /// Parse a string representation `long` of a type, and assert that diff --git a/core/src/program.rs b/core/src/program.rs index d59ecf32a2..f685c92ef9 100644 --- a/core/src/program.rs +++ b/core/src/program.rs @@ -67,14 +67,16 @@ impl FieldPath { /// Indeed, there's no such thing as a valid empty field path (at least from the parsing point /// of view): if `input` is empty, or consists only of spaces, `parse` returns a parse error. pub fn parse(cache: &mut Cache, input: String) -> Result { - use crate::parser::{grammar::StaticFieldPathParser, lexer::Lexer, ErrorTolerantParser}; + use crate::parser::{ + grammar::StaticFieldPathParser, lexer::Lexer, ErrorTolerantParserCompat, + }; let input_id = cache.replace_string(SourcePath::Query, input); let s = cache.source(input_id); let parser = StaticFieldPathParser::new(); let field_path = parser - .parse_strict(input_id, Lexer::new(s)) + .parse_strict_compat(input_id, Lexer::new(s)) // We just need to report an error here .map_err(|mut errs| { errs.errors.pop().expect( @@ -140,14 +142,16 @@ impl FieldOverride { assignment: String, priority: MergePriority, ) -> Result { - use crate::parser::{grammar::CliFieldAssignmentParser, lexer::Lexer, ErrorTolerantParser}; + use crate::parser::{ + grammar::CliFieldAssignmentParser, lexer::Lexer, ErrorTolerantParserCompat, + }; let input_id = cache.replace_string(SourcePath::CliFieldAssignment, assignment); let s = cache.source(input_id); let parser = CliFieldAssignmentParser::new(); let (path, _, span_value) = parser - .parse_strict(input_id, Lexer::new(s)) + .parse_strict_compat(input_id, Lexer::new(s)) // We just need to report an error here .map_err(|mut errs| { errs.errors.pop().expect( diff --git a/core/src/repl/mod.rs b/core/src/repl/mod.rs index 3789792d39..03998d024b 100644 --- a/core/src/repl/mod.rs +++ b/core/src/repl/mod.rs @@ -16,7 +16,7 @@ use crate::eval::cache::Cache as EvalCache; use crate::eval::{Closure, VirtualMachine}; use crate::files::FileId; use crate::identifier::LocIdent; -use crate::parser::{grammar, lexer, ErrorTolerantParser, ExtendedTerm}; +use crate::parser::{grammar, lexer, ErrorTolerantParserCompat, ExtendedTerm}; use crate::program::FieldPath; use crate::term::TraverseOrder; use crate::term::{record::Field, RichTerm, Term, Traverse}; @@ -190,14 +190,14 @@ impl ReplImpl { let (term, parse_errs) = self .parser - .parse_tolerant(file_id, lexer::Lexer::new(exp))?; + .parse_tolerant_compat(file_id, lexer::Lexer::new(exp))?; if !parse_errs.no_errors() { return Err(parse_errs.into()); } match term { - ExtendedTerm::RichTerm(t) => { + ExtendedTerm::Term(t) => { let t = self.prepare(None, t)?; Ok(eval_function( &mut self.vm, @@ -370,7 +370,7 @@ pub enum InitError { } pub enum InputStatus { - Complete(ExtendedTerm), + Complete(ExtendedTerm), Partial, Command, Failed(ParseErrors), @@ -415,7 +415,7 @@ impl InputParser { let result = self .parser - .parse_tolerant(self.file_id, lexer::Lexer::new(input)); + .parse_tolerant_compat(self.file_id, lexer::Lexer::new(input)); let partial = |pe| { matches!( diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index d8c0c27751..1d115b723a 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -22,6 +22,7 @@ use string::NickelString; use crate::{ cache::InputFormat, + combine::Combine, error::{EvalError, ParseError}, eval::{cache::CacheIndex, Environment}, files::FileId, @@ -908,6 +909,25 @@ impl TypeAnnotation { } } +impl Combine for TypeAnnotation { + fn combine(left: Self, right: Self) -> Self { + let (typ, leftover) = match (left.typ, right.typ) { + (left_ty @ Some(_), right_ty @ Some(_)) => (left_ty, right_ty), + (left_ty, right_ty) => (left_ty.or(right_ty), None), + }; + + let contracts: Vec<_> = left + .contracts + .iter() + .cloned() + .chain(leftover) + .chain(right.contracts.iter().cloned()) + .collect(); + + TypeAnnotation { typ, contracts } + } +} + impl From for LetMetadata { fn from(annotation: TypeAnnotation) -> Self { LetMetadata { @@ -962,11 +982,27 @@ pub enum StrChunk { ), } -#[cfg(test)] impl StrChunk { + #[cfg(test)] pub fn expr(e: E) -> Self { StrChunk::Expr(e, 0) } + + pub fn try_chunks_as_static_str<'a, I>(chunks: I) -> Option + where + I: IntoIterator>, + E: 'a, + { + chunks + .into_iter() + .try_fold(String::new(), |mut acc, next| match next { + StrChunk::Literal(lit) => { + acc.push_str(lit); + Some(acc) + } + _ => None, + }) + } } impl Term { @@ -1188,17 +1224,7 @@ impl Term { /// when the term is a `Term::StrChunk` and all the chunks are `StrChunk::Literal(..)` pub fn try_str_chunk_as_static_str(&self) -> Option { match self { - Term::StrChunks(chunks) => { - chunks - .iter() - .try_fold(String::new(), |mut acc, next| match next { - StrChunk::Literal(lit) => { - acc.push_str(lit); - Some(acc) - } - _ => None, - }) - } + Term::StrChunks(chunks) => StrChunk::try_chunks_as_static_str(chunks), _ => None, } } diff --git a/core/src/term/record.rs b/core/src/term/record.rs index c938ca3c08..beed017b4b 100644 --- a/core/src/term/record.rs +++ b/core/src/term/record.rs @@ -127,6 +127,34 @@ impl FieldMetadata { && !self.not_exported && matches!(self.priority, MergePriority::Neutral) } + + /// Set the `field_name` attribute of the labels of the type and contracts annotations. + pub fn with_field_name(mut self, name: Option) -> Self { + self.annotation = self.annotation.with_field_name(name); + self + } +} + +impl Combine for FieldMetadata { + fn combine(left: Self, right: Self) -> Self { + let priority = match (left.priority, right.priority) { + // Neutral corresponds to the case where no priority was specified. In that case, the + // other priority takes precedence. + (MergePriority::Neutral, p) | (p, MergePriority::Neutral) => p, + // Otherwise, we keep the maximum of both priorities, as we would do when merging + // values. + (p1, p2) => std::cmp::max(p1, p2), + }; + + FieldMetadata { + doc: crate::eval::merge::merge_doc(left.doc, right.doc), + annotation: Combine::combine(left.annotation, right.annotation), + opt: left.opt || right.opt, + // The resulting field will be suppressed from serialization if either of the fields to be merged is. + not_exported: left.not_exported || right.not_exported, + priority, + } + } } impl From for FieldMetadata { @@ -208,16 +236,6 @@ impl Field { RecordExtKind::WithoutValue } } - - pub fn with_name(self, field_name: Option) -> Self { - Field { - metadata: FieldMetadata { - annotation: self.metadata.annotation.with_field_name(field_name), - ..self.metadata - }, - ..self - } - } } impl Traverse for Field { diff --git a/core/src/typ.rs b/core/src/typ.rs index f443f7f841..5b51175c62 100644 --- a/core/src/typ.rs +++ b/core/src/typ.rs @@ -1909,7 +1909,7 @@ impl PrettyPrintCap for Type {} #[cfg(test)] mod tests { use super::*; - use crate::parser::{grammar::FixedTypeParser, lexer::Lexer, ErrorTolerantParser}; + use crate::parser::{grammar::FixedTypeParser, lexer::Lexer, ErrorTolerantParserCompat}; /// Parse a type represented as a string. fn parse_type(s: &str) -> Type { @@ -1917,7 +1917,7 @@ mod tests { let id = Files::new().add("", s); FixedTypeParser::new() - .parse_strict(id, Lexer::new(s)) + .parse_strict_compat(id, Lexer::new(s)) .unwrap() } diff --git a/core/stdlib/internals.ncl b/core/stdlib/internals.ncl index bd74ab5483..9f2e3caf63 100644 --- a/core/stdlib/internals.ncl +++ b/core/stdlib/internals.ncl @@ -410,11 +410,6 @@ # `%contract/custom%`). "$naked_to_custom" = fun naked label value => 'Ok (naked label value), - # Recursive priorities operators - - "$rec_force" = fun value => %rec_force% (%force% value), - "$rec_default" = fun value => %rec_default% (%force% value), - # Provide access to std.contract.Equal within the initial environement. Merging # makes use of `std.contract.Equal`, but it can't blindly substitute such an # expression, because `contract` might have been redefined locally. Putting it diff --git a/core/tests/integration/typecheck_fail.rs b/core/tests/integration/typecheck_fail.rs index 816673a3e0..f829b47699 100644 --- a/core/tests/integration/typecheck_fail.rs +++ b/core/tests/integration/typecheck_fail.rs @@ -16,7 +16,7 @@ fn type_check_expr(s: impl std::string::ToString) -> Result<(), TypecheckError> let s = s.to_string(); let id = Files::new().add("", s.clone()); type_check( - &grammar::TermParser::new() + &grammar::ExprParser::new() .parse_strict(id, lexer::Lexer::new(&s)) .unwrap(), ) diff --git a/lsp/nls/src/analysis.rs b/lsp/nls/src/analysis.rs index 205c093116..61d96b6f61 100644 --- a/lsp/nls/src/analysis.rs +++ b/lsp/nls/src/analysis.rs @@ -438,7 +438,7 @@ mod tests { use nickel_lang_core::{ files::Files, identifier::Ident, - parser::{grammar, lexer, ErrorTolerantParser as _}, + parser::{grammar, lexer, ErrorTolerantParserCompat as _}, term::Term, }; @@ -480,7 +480,7 @@ mod tests { let file = Files::new().add("", s.to_owned()); let (rt, _errors) = grammar::TermParser::new() - .parse_tolerant(file, lexer::Lexer::new(s)) + .parse_tolerant_compat(file, lexer::Lexer::new(s)) .unwrap(); let parent = ParentLookup::new(&rt); diff --git a/lsp/nls/src/position.rs b/lsp/nls/src/position.rs index 09f44d5f23..477144476a 100644 --- a/lsp/nls/src/position.rs +++ b/lsp/nls/src/position.rs @@ -207,7 +207,7 @@ pub(crate) mod tests { use codespan::ByteIndex; use nickel_lang_core::{ files::{FileId, Files}, - parser::{grammar, lexer, ErrorTolerantParser}, + parser::{grammar, lexer, ErrorTolerantParserCompat}, term::{RichTerm, Term, UnaryOp}, }; @@ -217,7 +217,7 @@ pub(crate) mod tests { let id = Files::new().add("", String::from(s)); let term = grammar::TermParser::new() - .parse_strict(id, lexer::Lexer::new(s)) + .parse_strict_compat(id, lexer::Lexer::new(s)) .unwrap(); (id, term) } diff --git a/utils/src/test_program.rs b/utils/src/test_program.rs index 31a1136ba2..71556cf786 100644 --- a/utils/src/test_program.rs +++ b/utils/src/test_program.rs @@ -2,7 +2,7 @@ use nickel_lang_core::{ error::{Error, NullReporter, ParseError}, eval::cache::CacheImpl, files::Files, - parser::{grammar, lexer, ErrorTolerantParser, ExtendedTerm}, + parser::{grammar, lexer, ErrorTolerantParserCompat, ExtendedTerm}, program::Program, term::{RichTerm, Term}, typecheck::TypecheckMode, @@ -36,15 +36,15 @@ pub fn parse(s: &str) -> Result { let id = Files::new().add("", String::from(s)); grammar::TermParser::new() - .parse_strict(id, lexer::Lexer::new(s)) + .parse_strict_compat(id, lexer::Lexer::new(s)) .map_err(|errs| errs.errors.first().unwrap().clone()) } -pub fn parse_extended(s: &str) -> Result { +pub fn parse_extended(s: &str) -> Result, ParseError> { let id = Files::new().add("", String::from(s)); grammar::ExtendedTermParser::new() - .parse_strict(id, lexer::Lexer::new(s)) + .parse_strict_compat(id, lexer::Lexer::new(s)) .map_err(|errs| errs.errors.first().unwrap().clone()) }