From 64302a51f11c8559966213c2678e658b9f8e1dd3 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 27 Nov 2024 15:53:46 +0100 Subject: [PATCH] Clean the AST alloc interface A previous PR introduced generic `alloc` and `alloc_iter` methods, instead of needing a myriad of `xxx_move` functions for each and every AST component type. However, this poses a risk of memory leak because this method allocates in the generic bumpalo arena, which doesn't run destructors. This commit makes this approach safer by using a marker trait which forbids some AST components from being allocated through this method, when they need to be dropped. Anecdotally, `alloc_iter` is renamed to `alloc_many` and a specialized version `alloc_singleton` is added to eliminate a lot of `alloc.alloc_many(iter::once(x))`. In a second time, all the legacy `xxx_move` variants or the like are removed, and replaced with `alloc` and `alloc_many` whenever possible, making the interface of `AstAlloc` quite smaller. --- core/src/bytecode/ast/builder.rs | 26 +++--- core/src/bytecode/ast/compat.rs | 15 ++-- core/src/bytecode/ast/mod.rs | 133 +++++++++++-------------------- core/src/bytecode/ast/record.rs | 4 +- core/src/parser/grammar.lalrpop | 16 ++-- core/src/parser/uniterm.rs | 26 +++--- 6 files changed, 90 insertions(+), 130 deletions(-) diff --git a/core/src/bytecode/ast/builder.rs b/core/src/bytecode/ast/builder.rs index 521796906..52b7db846 100644 --- a/core/src/bytecode/ast/builder.rs +++ b/core/src/bytecode/ast/builder.rs @@ -187,7 +187,7 @@ impl<'ast> Field<'ast, Record<'ast>> { self.finalize_contracts(alloc); self.state.field_defs.push(record::FieldDef { - path: alloc.alloc_iter( + path: alloc.alloc_many( self.path .into_iter() .map(|id| FieldPathElem::Ident(id.into())), @@ -204,7 +204,7 @@ impl<'ast> Field<'ast, Record<'ast>> { self.finalize_contracts(alloc); self.state.field_defs.push(record::FieldDef { - path: alloc.alloc_iter( + path: alloc.alloc_many( self.path .into_iter() .map(|id| FieldPathElem::Ident(id.into())), @@ -221,7 +221,7 @@ impl<'ast> Field<'ast, Record<'ast>> { /// filling the field `self.metadata.annotation.contracts` with it. After this call, /// `self.contracts` is empty and shouldn't be used anymore (it will have no effect). fn finalize_contracts(&mut self, alloc: &'ast AstAlloc) { - self.metadata.annotation.contracts = alloc.types(self.contracts.drain(..)); + self.metadata.annotation.contracts = alloc.alloc_many(self.contracts.drain(..)); } } @@ -290,7 +290,7 @@ impl<'ast> Record<'ast> { pub fn build(self, alloc: &'ast AstAlloc) -> Ast<'ast> { alloc .record(record::Record { - field_defs: alloc.alloc_iter(self.field_defs), + field_defs: alloc.alloc_many(self.field_defs), open: self.open, }) .into() @@ -393,7 +393,7 @@ mod tests { { alloc .record(record::Record { - field_defs: alloc.alloc_iter(fields.into_iter().map(|(id, metadata, node)| { + field_defs: alloc.alloc_many(fields.into_iter().map(|(id, metadata, node)| { record::FieldDef { path: FieldPathElem::single_ident_path(alloc, id.into()), value: node.map(Ast::from), @@ -578,9 +578,9 @@ mod tests { ast, alloc .record(record::Record { - field_defs: alloc.alloc_iter(vec![ + field_defs: alloc.alloc_many(vec![ record::FieldDef { - path: alloc.alloc_iter(vec![ + path: alloc.alloc_many(vec![ FieldPathElem::Ident("terraform".into()), FieldPathElem::Ident("required_providers".into()) ]), @@ -588,7 +588,7 @@ mod tests { value: Some( alloc .record(record::Record { - field_defs: alloc.alloc_iter(vec![ + field_defs: alloc.alloc_many(vec![ record::FieldDef { path: FieldPathElem::single_ident_path( &alloc, @@ -615,7 +615,7 @@ mod tests { pos: TermPos::None, }, record::FieldDef { - path: alloc.alloc_iter(vec![ + path: alloc.alloc_many(vec![ FieldPathElem::Ident("terraform".into()), FieldPathElem::Ident("required_providers".into()), FieldPathElem::Ident("foo".into()) @@ -684,10 +684,10 @@ mod tests { iter::once(( "foo", record::FieldMetadata::from(Annotation { - contracts: alloc.types(std::iter::once(Type { + contracts: alloc.alloc_singleton(Type { typ: TypeF::String, pos: TermPos::None - })), + }), ..Default::default() }), None @@ -728,10 +728,10 @@ mod tests { typ: TypeF::Number, pos: TermPos::None, }), - contracts: alloc.types(iter::once(Type { + contracts: alloc.alloc_singleton(Type { typ: TypeF::String, pos: TermPos::None - })), + }), }, }, None, diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index f4361696f..b7e73ddb6 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -8,7 +8,6 @@ use super::{primop::PrimOp, *}; use crate::{combine::Combine, label, position::RawSpan, term, typ as mline_type}; use indexmap::IndexMap; use smallvec::SmallVec; -use std::iter; /// Convert from the mainline Nickel representation to the new AST representation. This trait is /// mostly `From` with an additional argument for the allocator. @@ -28,7 +27,7 @@ impl<'ast> FromMainline<'ast, term::pattern::Pattern> for &'ast Pattern<'ast> { alloc: &'ast AstAlloc, pattern: &term::pattern::Pattern, ) -> &'ast Pattern<'ast> { - alloc.pattern(pattern.to_ast(alloc)) + alloc.alloc(pattern.to_ast(alloc)) } } @@ -110,7 +109,7 @@ impl<'ast> FromMainline<'ast, term::pattern::ArrayPattern> for PatternData<'ast> impl<'ast> FromMainline<'ast, term::pattern::EnumPattern> for PatternData<'ast> { fn from_mainline(alloc: &'ast AstAlloc, enum_pat: &term::pattern::EnumPattern) -> Self { let pattern = enum_pat.pattern.as_ref().map(|pat| (**pat).to_ast(alloc)); - PatternData::Enum(alloc.enum_pattern(EnumPattern { + PatternData::Enum(alloc.alloc(EnumPattern { tag: enum_pat.tag, pattern, pos: enum_pat.pos, @@ -154,7 +153,7 @@ impl<'ast> FromMainline<'ast, term::TypeAnnotation> for Annotation<'ast> { fn from_mainline(alloc: &'ast AstAlloc, annot: &term::TypeAnnotation) -> Self { let typ = annot.typ.as_ref().map(|typ| typ.typ.to_ast(alloc)); - let contracts = alloc.types( + let contracts = alloc.alloc_many( annot .contracts .iter() @@ -177,7 +176,7 @@ impl<'ast> FromMainline<'ast, (LocIdent, term::record::Field)> for record::Field .unwrap_or(id.pos); record::FieldDef { - path: alloc.alloc_iter(iter::once(record::FieldPathElem::Ident(*id))), + path: alloc.alloc_singleton(record::FieldPathElem::Ident(*id)), value: content.value.as_ref().map(|term| term.to_ast(alloc)), metadata: content.metadata.to_ast(alloc), pos, @@ -370,12 +369,12 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> { })); alloc.record(Record { - field_defs: alloc.alloc_iter(field_defs), + field_defs: alloc.alloc_many(field_defs), open: data.attrs.open, }) } Term::Record(data) => { - let field_defs = alloc.alloc_iter(data.fields.iter().map(|(id, field)| { + let field_defs = alloc.alloc_many(data.fields.iter().map(|(id, field)| { let pos = field .value .as_ref() @@ -477,7 +476,7 @@ impl<'ast> FromMainline<'ast, term::RichTerm> for Ast<'ast> { impl<'ast> FromMainline<'ast, term::RichTerm> for &'ast Ast<'ast> { fn from_mainline(alloc: &'ast AstAlloc, rterm: &term::RichTerm) -> Self { - alloc.ast(rterm.to_ast(alloc)) + alloc.alloc(rterm.to_ast(alloc)) } } diff --git a/core/src/bytecode/ast/mod.rs b/core/src/bytecode/ast/mod.rs index 16b38f73a..2f6d5150a 100644 --- a/core/src/bytecode/ast/mod.rs +++ b/core/src/bytecode/ast/mod.rs @@ -306,6 +306,32 @@ pub enum Import<'ast> { Package { id: Ident }, } +/// Marker trait for AST nodes that don't need to be dropped (in practice, it's often equivalent to +/// not owning any heap allocated data) and can be used with [allocator][AstAlloc::alloc]. The +/// current exceptions are [Number] and [crate::error::ParseError], which must be allocated through +/// specialized method in [AstAlloc]. +pub trait Allocable {} + +impl Allocable for Ast<'_> {} +impl Allocable for StringChunk {} +impl Allocable for LetBinding<'_> {} +impl Allocable for PrimOp {} +impl Allocable for Annotation<'_> {} + +impl Allocable for record::FieldPathElem<'_> {} +impl Allocable for FieldDef<'_> {} + +impl Allocable for Pattern<'_> {} +impl Allocable for EnumPattern<'_> {} +impl Allocable for FieldPattern<'_> {} +impl Allocable for RecordPattern<'_> {} +impl Allocable for ArrayPattern<'_> {} +impl Allocable for OrPattern<'_> {} +impl Allocable for ConstantPattern<'_> {} + +impl Allocable for Type<'_> {} +impl Allocable for typ::RecordRows<'_> {} +impl Allocable for typ::EnumRows<'_> {} /// Owns the arenas required to allocate new AST nodes and provide builder methods to create them. /// /// # Drop and arena allocation @@ -326,7 +352,7 @@ pub struct AstAlloc { } impl AstAlloc { - /// Create a new ast allocator. + /// Creates a new ast allocator. pub fn new() -> Self { Self { generic_arena: Bump::new(), @@ -335,21 +361,21 @@ impl AstAlloc { } } - /// Allocate an AST element in the arena. + /// Allocates an AST component in the arena. /// /// [Self] never guarantees that all destructors are going to be run when using such a generic /// allocation function. We don't want to allocate values that need to be dropped through this /// method, typically because they own heap-allocated data, such as numbers or parse errors. /// That's why we use a marker trait to specify which types can be allocated freely. Types that - /// need to be dropped have a dedicated method for allocation. - pub fn alloc(&self, value: T) -> &T { + /// need to be dropped don't implement [Allocable] and have a dedicated method for allocation. + pub fn alloc(&self, value: T) -> &T { self.generic_arena.alloc(value) } - /// Allocate a sequence of AST elements in the arena. + /// Allocates a sequence of AST components in the arena. /// /// See [Self::alloc]. - pub fn alloc_iter(&self, iter: I) -> &[T] + pub fn alloc_many(&self, iter: I) -> &[T] where I: IntoIterator, I::IntoIter: ExactSizeIterator, @@ -357,19 +383,22 @@ impl AstAlloc { self.generic_arena.alloc_slice_fill_iter(iter) } - pub fn alloc_str<'ast>(&'ast self, s: &str) -> &'ast str { - self.generic_arena.alloc_str(s) + /// Allocates an array with exactly one element in the arena. + pub fn alloc_singleton(&self, value: T) -> &[T] { + self.generic_arena + .alloc_slice_fill_iter(std::iter::once(value)) } - pub fn node<'ast>(&'ast self, node: Node<'ast>) -> &'ast Node<'ast> { - self.generic_arena.alloc(node) + /// Allocates a string in the arena. + pub fn alloc_str<'ast>(&'ast self, s: &str) -> &'ast str { + self.generic_arena.alloc_str(s) } pub fn number(&self, number: Number) -> Node<'_> { Node::Number(self.number_arena.alloc(number)) } - pub fn number_move(&self, number: Number) -> &'_ Number { + pub fn alloc_number(&self, number: Number) -> &'_ Number { self.number_arena.alloc(number) } @@ -532,91 +561,22 @@ impl AstAlloc { Node::Type(self.generic_arena.alloc(typ)) } - pub fn type_from_unr<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> Node<'ast> { - Node::Type(self.type_move(Type { typ, pos })) - } - pub fn type_data<'ast>(&'ast self, typ: TypeUnr<'ast>, pos: TermPos) -> &'ast Type<'ast> { - self.type_move(Type { typ, pos }) - } - - pub fn type_move<'ast>(&'ast self, typ: Type<'ast>) -> &'ast Type<'ast> { - self.generic_arena.alloc(typ) - } - - pub fn types<'ast, I>(&'ast self, types: I) -> &'ast [Type<'ast>] - where - I: IntoIterator>, - I::IntoIter: ExactSizeIterator, - { - self.generic_arena.alloc_slice_fill_iter(types) + self.generic_arena.alloc(Type { typ, pos }) } pub fn enum_rows<'ast>(&'ast self, erows: EnumRowsUnr<'ast>) -> &'ast EnumRows<'ast> { self.generic_arena.alloc(EnumRows(erows)) } - pub fn enum_rows_move<'ast>(&'ast self, erows: EnumRows<'ast>) -> &'ast EnumRows<'ast> { - self.generic_arena.alloc(erows) - } - pub fn record_rows<'ast>(&'ast self, rrows: RecordRowsUnr<'ast>) -> &'ast RecordRows<'ast> { self.generic_arena.alloc(RecordRows(rrows)) } - pub fn record_rows_move<'ast>(&'ast self, rrows: RecordRows<'ast>) -> &'ast RecordRows<'ast> { - self.generic_arena.alloc(rrows) - } - - pub fn record_row<'ast>(&'ast self, id: LocIdent, typ: Type<'ast>) -> &'ast RecordRow<'ast> { - self.generic_arena.alloc(RecordRow { - id, - typ: self.generic_arena.alloc(typ), - }) - } - pub fn parse_error(&self, error: ParseError) -> Node<'_> { Node::ParseError(self.error_arena.alloc(error)) } - pub fn ast<'ast>(&'ast self, ast: Ast<'ast>) -> &'ast Ast<'ast> { - self.generic_arena.alloc(ast) - } - - pub fn pattern<'ast>(&'ast self, pattern: Pattern<'ast>) -> &'ast Pattern<'ast> { - self.generic_arena.alloc(pattern) - } - - pub fn patterns<'ast, I>(&'ast self, patterns: I) -> &'ast [Pattern<'ast>] - where - I: IntoIterator>, - I::IntoIter: ExactSizeIterator, - { - self.generic_arena.alloc_slice_fill_iter(patterns) - } - - pub fn enum_pattern<'ast>( - &'ast self, - enum_pattern: EnumPattern<'ast>, - ) -> &'ast EnumPattern<'ast> { - self.generic_arena.alloc(enum_pattern) - } - - pub fn field_pattern<'ast>( - &'ast self, - field_pat: FieldPattern<'ast>, - ) -> &'ast FieldPattern<'ast> { - self.generic_arena.alloc(field_pat) - } - - pub fn field_patterns<'ast, I>(&'ast self, field_pats: I) -> &'ast [FieldPattern<'ast>] - where - I: IntoIterator>, - I::IntoIter: ExactSizeIterator, - { - self.generic_arena.alloc_slice_fill_iter(field_pats) - } - pub fn record_pattern<'ast, I>( &'ast self, patterns: I, @@ -628,7 +588,7 @@ impl AstAlloc { I::IntoIter: ExactSizeIterator, { self.generic_arena.alloc(RecordPattern { - patterns: self.field_patterns(patterns), + patterns: self.generic_arena.alloc_slice_fill_iter(patterns), tail, pos, }) @@ -645,7 +605,7 @@ impl AstAlloc { I::IntoIter: ExactSizeIterator, { self.generic_arena.alloc(ArrayPattern { - patterns: self.patterns(patterns), + patterns: self.generic_arena.alloc_slice_fill_iter(patterns), tail, pos, }) @@ -656,9 +616,10 @@ impl AstAlloc { I: IntoIterator>, I::IntoIter: ExactSizeIterator, { - let patterns = self.generic_arena.alloc_slice_fill_iter(patterns); - - self.generic_arena.alloc(OrPattern { patterns, pos }) + self.generic_arena.alloc(OrPattern { + patterns: self.generic_arena.alloc_slice_fill_iter(patterns), + pos, + }) } } diff --git a/core/src/bytecode/ast/record.rs b/core/src/bytecode/ast/record.rs index 4b8118a2a..6c1e852a7 100644 --- a/core/src/bytecode/ast/record.rs +++ b/core/src/bytecode/ast/record.rs @@ -35,12 +35,12 @@ impl<'ast> FieldPathElem<'ast> { alloc: &'ast AstAlloc, ident: LocIdent, ) -> &'ast [FieldPathElem<'ast>] { - alloc.alloc_iter(std::iter::once(FieldPathElem::Ident(ident))) + alloc.alloc_singleton(FieldPathElem::Ident(ident)) } /// Crate a path composed of a single dynamic expression. pub fn single_expr_path(alloc: &'ast AstAlloc, expr: Ast<'ast>) -> &'ast [FieldPathElem<'ast>] { - alloc.alloc_iter(std::iter::once(FieldPathElem::Expr(expr))) + alloc.alloc_singleton(FieldPathElem::Expr(expr)) } /// Try to interpret this element element as a static identifier. Returns `None` if the the diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index c093b25a7..19b656b32 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -147,7 +147,7 @@ AnnotSeries: AnnotAtom = => { // `FixedType` (see `FixedType` and `parser::utils::fix_type_vars`). AnnotAtom: Annotation<'ast> = { "|" => Annotation { - contracts: alloc.types(iter::once(<>)), + contracts: alloc.alloc_singleton(<>), ..Default::default() }, ":" => Annotation { @@ -467,7 +467,7 @@ RecordField: FieldDef<'ast> = {