Improve types for null accesses and remove hacks (#6954)

When a struct.get or array.get is optimized to have a null reference operand, its return type loses meaning since the operation will always trap. Previously when refinalizing such expressions, we just left their return type unchanged since there was no longer an associated struct or array type to calculate it from. However, this could lead to a strange setup where the stale return type was the last remaining use of some heap type in the module. That heap type would never be emitted in the binary, but it was still used in the IR, so type optimizations would have to keep updating it. Our type collecting logic went out of its way to include the return types of struct.get and array.get expressions to account for this strange possibility, even though it otherwise collected only types that would appear in binaries. In principle, all of this should have applied to `call_ref` as well, but the type collection logic did not have the necessary special case, so there was probably a latent bug there. Get rid of these special cases in the type collection logic and make it impossible for the IR to use a stale type that no longer appears in the binary by updating such stale types during finalization. One possibility would have been to make the return types of null accessors unreachable, but this violates the usual invariant that unreachable instructions must either have unreachable children or be branches or `(unreachable)`. Instead, refine the return types to be uninhabitable non-nullable references to bottom, which is nearly as good as refining them directly to unreachable. We can consider refining them to `unreachable` in the future, but another problem with that is that it would currently allow the parsers to admit more invalid modules with arbitrary junk after null accessor instructions.
WebAssembly · Sep 18, 2024 · b381a8c · b381a8c
1 parent a99b48a
commit b381a8c
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 23 deletions.
diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp
@@ -399,15 +399,10 @@ struct CodeScanner
       }
     } else if (auto* get = curr->dynCast<StructGet>()) {
       info.note(get->ref->type);
-      // TODO: Just include curr->type for AllTypes and UsedIRTypes to avoid
-      // this special case and to avoid emitting unnecessary types in binaries.
-      info.include(get->type);
     } else if (auto* set = curr->dynCast<StructSet>()) {
       info.note(set->ref->type);
     } else if (auto* get = curr->dynCast<ArrayGet>()) {
       info.note(get->ref->type);
-      // See above.
-      info.include(get->type);
     } else if (auto* set = curr->dynCast<ArraySet>()) {
       info.note(set->ref->type);
     } else if (auto* contBind = curr->dynCast<ContBind>()) {
@@ -468,20 +463,6 @@ InsertOrderedMap<HeapType, HeapTypeInfo> collectHeapTypeInfo(
     }
   }
 
-  // TODO: Remove this once we remove the hack for StructGet and StructSet in
-  // CodeScanner.
-  if (inclusion == TypeInclusion::UsedIRTypes) {
-    auto it = info.info.begin();
-    while (it != info.info.end()) {
-      if (it->second.useCount == 0) {
-        auto deleted = it++;
-        info.info.erase(deleted);
-      } else {
-        ++it;
-      }
-    }
-  }
-
   // Recursively traverse each reference type, which may have a child type that
   // is itself a reference type. This reflects an appearance in the binary
   // format that is in the type section itself. As we do this we may find more

diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp
@@ -1008,7 +1008,25 @@ void CallRef::finalize() {
     return;
   }
   assert(target->type.isRef());
-  if (target->type.getHeapType().isBottom()) {
+  if (target->type.isNull()) {
+    // If this call_ref has been optimized to have a null reference, then it
+    // will definitely trap. We could update the type to be unreachable, but
+    // that would violate the invariant that non-branch instructions other than
+    // `unreachable` can only be unreachable if they have unreachable children.
+    // Make the result type as close to `unreachable` as possible without
+    // actually making it unreachable. TODO: consider just making this
+    // unreachable instead (and similar in other GC accessors), although this
+    // would currently cause the parser to admit more invalid modules.
+    if (type.isRef()) {
+      type = Type(type.getHeapType().getBottom(), NonNullable);
+    } else if (type.isTuple()) {
+      Tuple elems;
+      for (auto t : type) {
+        elems.push_back(
+          t.isRef() ? Type(t.getHeapType().getBottom(), NonNullable) : t);
+      }
+      type = Type(elems);
+    }
     return;
   }
   assert(target->type.getHeapType().isSignature());
@@ -1136,7 +1154,12 @@ void StructNew::finalize() {
 void StructGet::finalize() {
   if (ref->type == Type::unreachable) {
     type = Type::unreachable;
-  } else if (!ref->type.isNull()) {
+  } else if (ref->type.isNull()) {
+    // See comment on CallRef for explanation.
+    if (type.isRef()) {
+      type = Type(type.getHeapType().getBottom(), NonNullable);
+    }
+  } else {
     type = ref->type.getHeapType().getStruct().fields[index].type;
   }
 }
@@ -1180,7 +1203,12 @@ void ArrayNewFixed::finalize() {
 void ArrayGet::finalize() {
   if (ref->type == Type::unreachable || index->type == Type::unreachable) {
     type = Type::unreachable;
-  } else if (!ref->type.isNull()) {
+  } else if (ref->type.isNull()) {
+    // See comment on CallRef for explanation.
+    if (type.isRef()) {
+      type = Type(type.getHeapType().getBottom(), NonNullable);
+    }
+  } else {
     type = ref->type.getHeapType().getArray().element.type;
   }
 }

diff --git a/test/lit/passes/local-subtyping.wast b/test/lit/passes/local-subtyping.wast
@@ -274,7 +274,7 @@
 
   ;; CHECK:      (func $multiple-iterations-refinalize-call-ref-bottom (type $0)
   ;; CHECK-NEXT:  (local $f nullfuncref)
-  ;; CHECK-NEXT:  (local $x anyref)
+  ;; CHECK-NEXT:  (local $x (ref none))
   ;; CHECK-NEXT:  (local.set $f
   ;; CHECK-NEXT:   (ref.null nofunc)
   ;; CHECK-NEXT:  )