From 751da650bea2a58d5211dd81c1c6a24d74e7bf84 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 30 Oct 2024 12:56:36 +0100 Subject: [PATCH] WIP - FixTypeVars, part 1 --- core/src/parser/uniterm.rs | 315 ++++++++++++++++++++++++++----------- 1 file changed, 227 insertions(+), 88 deletions(-) diff --git a/core/src/parser/uniterm.rs b/core/src/parser/uniterm.rs index 47ba2e273d..05f47abf4e 100644 --- a/core/src/parser/uniterm.rs +++ b/core/src/parser/uniterm.rs @@ -8,7 +8,7 @@ use crate::{ bytecode::ast::{ self, record::{Field, FieldMetadata}, - typ::{EnumRows, RecordRow, RecordRows, Type}, + typ::{EnumRow, EnumRows, RecordRow, RecordRows, Type}, Annotation, Ast, AstAlloc, MergePriority, Node, }, environment::Environment, @@ -136,8 +136,8 @@ impl<'ast> TryFromUni<'ast, UniTerm<'ast>> for Ast<'ast> { let node = match node { UniTermNode::Var(id) => Node::Var(id), UniTermNode::Record(r) => Ast::try_from_uni(alloc, r)?.node, - UniTermNode::Type(mut typ) => { - typ.fix_type_vars(pos.unwrap())?; + UniTermNode::Type(typ) => { + let typ = typ.fix_type_vars(alloc, pos.unwrap())?; if let TypeF::Contract(ctr) = typ.typ { ctr.node @@ -247,11 +247,7 @@ impl<'ast> UniRecord<'ast> { value: None, metadata: FieldMetadata { - annotation: - Annotation { - typ: Some(typ), - .. - }, + annotation: Annotation { typ: Some(typ), .. }, .. }, .. @@ -370,7 +366,10 @@ impl<'ast> UniRecord<'ast> { priority: MergePriority::Neutral, }, } if contracts.is_empty() => Ok(RecordRows(RecordRowsF::Extend { - row: RecordRow { id, typ: alloc.type_data(typ.typ, typ.pos) }, + row: RecordRow { + id, + typ: alloc.type_data(typ.typ, typ.pos), + }, tail: alloc.record_rows(tail.0), })), _ => { @@ -478,15 +477,15 @@ impl<'ast> TryFromUni<'ast, UniRecord<'ast>> for Ast<'ast> { let result = if ur.tail.is_some() || (ur.is_record_type() && !ur.fields.is_empty()) { let tail_span = ur.tail.as_ref().and_then(|t| t.1.into_opt()); // We unwrap all positions: at this stage of the parsing, they must all be set - let mut typ = ur - .into_type_strict(alloc) - .map_err(|cause| ParseError::InvalidRecordType { - tail_span, - record_span: pos.unwrap(), - cause, - })?; - - typ.fix_type_vars(pos.unwrap())?; + let mut typ = + ur.into_type_strict(alloc) + .map_err(|cause| ParseError::InvalidRecordType { + tail_span, + record_span: pos.unwrap(), + cause, + })?; + + typ.fix_type_vars(alloc, pos.unwrap())?; Ok(Ast { node: alloc.typ(typ), pos, @@ -612,10 +611,46 @@ impl VarKindCell { } } -pub(super) trait FixTypeVars { +#[derive(Clone, Debug)] +pub(super) enum FixResult { + Fixed(T), + Unchanged(T), +} + +impl FixResult { + pub fn zip(self, other: FixResult) -> FixResult<(T, U)> { + match (self, other) { + (FixResult::Fixed(t), FixResult::Fixed(u)) + | (FixResult::Unchanged(t), FixResult::Fixed(u)) + | (FixResult::Fixed(t), FixResult::Unchanged(u)) => FixResult::Fixed((t, u)), + (FixResult::Unchanged(t), FixResult::Unchanged(u)) => FixResult::Unchanged((t, u)), + } + } + + pub fn map_if_fixed(self, f: F, this: U) -> FixResult + where + F: FnOnce(T) -> U, + { + match self { + FixResult::Fixed(t) => FixResult::Fixed(f(t)), + FixResult::Unchanged(_) => FixResult::Unchanged(this), + } + } + + pub fn extract(self) -> T { + match self { + FixResult::Fixed(t) | FixResult::Unchanged(t) => t, + } + } +} + +pub(super) trait FixTypeVars<'ast> +where + Self: Sized, +{ /// Post-process a type at the right hand side of an annotation by replacing each unbound type /// variable `TypeF::Var(id)` by a term variable with the same identifier seen as a custom - /// contract `TypeF::Contract(Term::Var(id))`. + /// contract `TypeF::Contract(Node::Var(id))`. /// /// Additionally, this passes determine the kind of a variable introduced by a forall binder. /// @@ -625,8 +660,8 @@ pub(super) trait FixTypeVars { /// variables occurring in types, we often can't know right away if such a variable occurrence /// will eventually be a type variable or a term variable seen as a custom contract. /// - /// Take for example `a -> b`. At this stage, `a` and `b` could be both variables referring to a - /// contract (e.g. in `x | a -> b`) or a type variable (e.g. in `x | forall a b. a -> b`), + /// Take for example `a -> b`. At this stage, `a` and `b` could be both variables referring to + /// a contract (e.g. in `x | a -> b`) or type variables (e.g. in `x | forall a b. a -> b`), /// depending on enclosing `forall`s. To handle both cases, we initially parse all variables /// inside types as type variables. When reaching the right-hand side of an annotation, because /// `forall`s can only bind locally in a type, we can then decide the actual nature of each @@ -634,9 +669,10 @@ pub(super) trait FixTypeVars { /// that are not actually bound by a `forall` to be term variables. This is the role of /// `fix_type_vars()`. /// - /// Once again because `forall`s only bind variables locally, and don't bind inside contracts, - /// we don't have to recurse into contracts and this pass will only visit each node of the AST - /// at most once in total (and most probably much less so). + /// Since `forall`s only bind type variables locally and cross contract boundaries, we don't + /// have to recurse into contracts and this pass will only visit each node of the AST at most + /// once in total (and most probably much less so). In some sense, we just visit the type + /// layer, or type spine, composed only of type constructors. /// /// There is one subtlety with unirecords, though. A unirecord can still be in interpreted as a /// record type later. Take the following example: @@ -671,26 +707,46 @@ pub(super) trait FixTypeVars { /// # this is inconsistent and will raise a parse error /// forall a. [| 'foo, 'bar; a |] -> {foo : Str, bar: Str; a} /// ``` - fn fix_type_vars(&mut self, span: RawSpan) -> Result<(), ParseError> { - self.fix_type_vars_env(BoundVarEnv::new(), span) + fn fix_type_vars(self, alloc: &'ast AstAlloc, span: RawSpan) -> Result { + self.fix_type_vars_env(alloc, BoundVarEnv::new(), span) + .map(FixResult::extract) } /// Fix type vars in a given environment of variables bound by foralls enclosing this type. The /// environment maps bound variables to a reference to the variable kind of the corresponding /// forall. fn fix_type_vars_env( - &mut self, + self, + alloc: &'ast AstAlloc, + bound_vars: BoundVarEnv, + span: RawSpan, + ) -> Result, ParseError>; +} + +impl<'ast> FixTypeVars<'ast> for &'ast Type<'ast> { + fn fix_type_vars_env( + self, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError>; + ) -> Result, ParseError> { + todo!() + } } -impl<'ast> FixTypeVars for Type<'ast> { +impl<'ast> FixTypeVars<'ast> for Type<'ast> { fn fix_type_vars_env( - &mut self, + self, + alloc: &'ast AstAlloc, mut bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError> { + ) -> Result, ParseError> { + use crate::bytecode::ast::typ::TypeUnr; + + let pos = self.pos; + + let build_fixed = |new_type: TypeUnr<'ast>| -> Self { Type { typ: new_type, pos } }; + match self.typ { TypeF::Dyn | TypeF::Number @@ -704,11 +760,13 @@ impl<'ast> FixTypeVars for Type<'ast> { // particular mustn't be allowed to capture type variables from the enclosing type: see // https://github.com/tweag/nickel/issues/1228. | TypeF::Dict { flavour: DictTypeFlavour::Contract, ..} - | TypeF::Wildcard(_) => Ok(()), - TypeF::Arrow(ref mut s, ref mut t) => { - (*s).fix_type_vars_env(bound_vars.clone(), span)?; - (*t).fix_type_vars_env(bound_vars, span)?; - Ok(()) + | TypeF::Wildcard(_) => Ok(FixResult::Unchanged(self)), + TypeF::Arrow(src, tgt) => { + let result = src.fix_type_vars_env(alloc, bound_vars.clone(), span)?.zip( + tgt.fix_type_vars_env(alloc, bound_vars, span)? + ); + + Ok(result.map_if_fixed(|(src, tgt)| build_fixed(TypeF::Arrow(src, tgt)), self)) } TypeF::Var(sym) => { if let Some(cell) = bound_vars.get(&sym) { @@ -717,96 +775,177 @@ impl<'ast> FixTypeVars for Type<'ast> { ty_var: LocIdent::from(sym).with_pos(self.pos), span })?; + + Ok(FixResult::Unchanged(self)) } else { let id = LocIdent::from(sym).with_pos(self.pos); - self.typ = TypeF::Contract(RichTerm::new(Term::Var(id), id.pos)); + + Ok(FixResult::Fixed(build_fixed( + TypeF::Contract(alloc.ast(Ast { + node: Node::Var(id), + pos: id.pos}))))) } - Ok(()) } TypeF::Forall { - ref var, - ref mut var_kind, - ref mut body, + var, + var_kind, + body, } => { // We span a new VarKindCell and put it in the environment. The recursive calls to // fix_type_vars will fill this cell with the correct kind, which we get afterwards // to set the right value for `var_kind`. bound_vars.insert(var.ident(), VarKindCell::new()); -// let x : forall a. { _foo: forall a. a, bar: { ; a } } - (*body).fix_type_vars_env(bound_vars.clone(), span)?; - // unwrap(): We just inserted a value for `var` above, and environment can never + let body = body.fix_type_vars_env(alloc, bound_vars.clone(), span)?; + // unwrap(): we just inserted a value for `var` above, and environment can never // delete values. - // take_var_kind(): Once we leave the body of this forall, we no longer need + // take_var_kind(): once we leave the body of this forall, we no longer need // access to this VarKindCell in bound_vars. We can avoid a clone by taking // the var_kind out. We could also take the whole key value pair out of the // `Environment`, but ownership there is trickier. - *var_kind = bound_vars + let var_kind = bound_vars .get(&var.ident()) .unwrap() .take_var_kind() .unwrap_or_default(); - Ok(()) + Ok(body.map_if_fixed(|body| build_fixed(TypeF::Forall { + var, + var_kind, + body, + }), self)) } TypeF::Dict { - type_fields: ref mut ty, - flavour: DictTypeFlavour::Type - } | TypeF::Array(ref mut ty) => { - (*ty).fix_type_vars_env(bound_vars, span) + type_fields: ty, + flavour: flavour @ DictTypeFlavour::Type + } => { + Ok(ty.fix_type_vars_env(alloc, bound_vars, span)?.map_if_fixed(|type_fields| build_fixed(TypeF::Dict { + type_fields, + flavour, + }), self)) } - TypeF::Enum(ref mut erows) => erows.fix_type_vars_env(bound_vars, span), - TypeF::Record(ref mut rrows) => rrows.fix_type_vars_env(bound_vars, span), + TypeF::Array(ty) => { + todo!() + } + TypeF::Enum(erows) => Ok(erows.fix_type_vars_env(alloc, bound_vars, span)?.map_if_fixed(|erows| build_fixed(TypeF::Enum(erows)), self)), + TypeF::Record(rrows) => Ok(rrows.fix_type_vars_env(alloc, bound_vars, span)?.map_if_fixed(|rrows| build_fixed(TypeF::Record(rrows)), self)), } } } -impl<'ast> FixTypeVars for RecordRows<'ast> { +fn do_fix<'ast>( + rrows: &'ast RecordRows, + alloc: &'ast AstAlloc, + bound_vars: BoundVarEnv, + span: RawSpan, + mut maybe_excluded: HashSet, +) -> Result>, ParseError> { + match &rrows.0 { + RecordRowsF::Empty | RecordRowsF::TailDyn => Ok(FixResult::Unchanged(rrows)), + // We can't have a contract in tail position, so we don't fix `TailVar`. However, we + // have to set the correct kind for the corresponding forall binder. + RecordRowsF::TailVar(ref id) => { + if let Some(cell) = bound_vars.get(&id.ident()) { + cell.try_set(VarKind::RecordRows { + excluded: maybe_excluded, + }) + .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: *id, span })?; + } + + Ok(FixResult::Unchanged(rrows)) + } + RecordRowsF::Extend { row, tail } => { + maybe_excluded.insert(row.id.ident()); + + Ok(row + + .fix_type_vars_env(alloc, bound_vars.clone(), span)? + .zip(do_fix(tail, alloc, bound_vars, span, maybe_excluded)?) + .map_if_fixed( + |(row, tail)| alloc.record_rows(RecordRowsF::Extend { row, tail }), + rrows, + )) + } + } +} + +impl<'ast> FixTypeVars<'ast> for RecordRows<'ast> { fn fix_type_vars_env( - &mut self, + self, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError> { - fn helper( - rrows: &mut RecordRows, - bound_vars: BoundVarEnv, - span: RawSpan, - mut maybe_excluded: HashSet, - ) -> Result<(), ParseError> { - match rrows.0 { - RecordRowsF::Empty => Ok(()), - RecordRowsF::TailDyn => Ok(()), - // We can't have a contract in tail position, so we don't fix `TailVar`. However, we - // have to set the correct kind for the corresponding forall binder. - RecordRowsF::TailVar(ref id) => { - if let Some(cell) = bound_vars.get(&id.ident()) { - cell.try_set(VarKind::RecordRows { - excluded: maybe_excluded, - }) - .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: *id, span })?; - } - Ok(()) - } - RecordRowsF::Extend { - ref mut row, - ref mut tail, - } => { - maybe_excluded.insert(row.id.ident()); - row.typ.fix_type_vars_env(bound_vars.clone(), span)?; - helper(tail, bound_vars, span, maybe_excluded) + ) -> Result, ParseError> { + let mut maybe_excluded = HashSet::new(); + + match self.0 { + RecordRowsF::Empty | RecordRowsF::TailDyn => Ok(FixResult::Unchanged(self)), + // We can't have a contract in tail position, so we don't fix `TailVar`. However, we + // have to set the correct kind for the corresponding forall binder. + RecordRowsF::TailVar(ref id) => { + if let Some(cell) = bound_vars.get(&id.ident()) { + cell.try_set(VarKind::RecordRows { + excluded: maybe_excluded, + }) + .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: *id, span })?; } + + Ok(FixResult::Unchanged(self)) + } + RecordRowsF::Extend { row, tail } => { + maybe_excluded.insert(row.id.ident()); + + Ok(row + .fix_type_vars_env(alloc, bound_vars.clone(), span)? + .zip(do_fix(tail, alloc, bound_vars, span, maybe_excluded)?) + .map_if_fixed( + |(row, tail)| RecordRows(RecordRowsF::Extend { row, tail }), + self, + )) } } + } +} - helper(self, bound_vars, span, HashSet::new()) +impl<'ast> FixTypeVars<'ast> for &'ast RecordRows<'ast> { + fn fix_type_vars_env( + self, + alloc: &'ast AstAlloc, + bound_vars: BoundVarEnv, + span: RawSpan, + ) -> Result, ParseError> { + todo!() } } -impl FixTypeVars for EnumRows { +impl<'ast> FixTypeVars<'ast> for RecordRow<'ast> { fn fix_type_vars_env( - &mut self, + self, + alloc: &'ast AstAlloc, + bound_vars: BoundVarEnv, + span: RawSpan, + ) -> Result, ParseError> { + todo!() + } +} + +impl<'ast> FixTypeVars<'ast> for &'ast RecordRow<'ast> { + fn fix_type_vars_env( + self, + alloc: &'ast AstAlloc, + bound_vars: BoundVarEnv, + span: RawSpan, + ) -> Result, ParseError> { + todo!() + } +} + +impl<'ast> FixTypeVars<'ast> for EnumRows<'ast> { + fn fix_type_vars_env( + self, + alloc: &'ast AstAlloc, bound_vars: BoundVarEnv, span: RawSpan, - ) -> Result<(), ParseError> { + ) -> Result, ParseError> { fn do_fix( erows: &mut EnumRows, bound_vars: BoundVarEnv,