From ecd21cfae86f7cbc3d6db6bc91b7033e0bc5b897 Mon Sep 17 00:00:00 2001
From: Thibault Suzanne <tsuzanne@meta.com>
Date: Wed, 4 Oct 2023 11:34:46 -0700
Subject: [PATCH] Fix Pulse erlang comparisons between {atom, any} and any

Summary:
When comparing two values `x` and `y`, when `x` is an integer and `y` can have any type (usually an
unknown value, such as a function parameter), the Pulse erlang model splits cases on `y` being an
integer or not, and then uses an integer and incompartible comparison on the according cases.

Atoms didn't implement this, but instead of just being considered as unsupported, the code used to
consider that `(x : Atom)` and `(y: Any)` were incompatible. This leads to `x == y` being interpreted
as always `false`, which is neither sound (should be unconstrained) or complete (should be
bottom/unreachable) when `y` can be anything.

We fix this by precisely supporting `Atom` vs. `Any` comparisons the same way we do for integers.

We incidentally fix `Any` vs unsupported types (such as lists) comparisons by having them unsupported
instead of assuming the types are distinct. `Any` vs `Any` is also concerned by this incidental fix.

Remark: this does transform some false negative tests into false positive, which at first sight would
seem to be inconsistent with Pulse goals. This is because equality on unsupported types returns an
unconstrained value instead of stopping the execution, which could be considered as another
change (the reason for this being that stopping the execution would lead to too many false negatives
with the current any-type-splitting logic). Also note that the behaviour, although less precise, is
more consistent: tests that check if the result of an unsupported call is `true` or `false` should
behave the same, which wasn't the case before.

Reviewed By: rgrig

Differential Revision: D49831551

fbshipit-source-id: 67043bd897f62691b1fa1fe0ad029d004560e9c2
---
 infer/src/pulse/PulseModelsErlang.ml          | 71 ++++++++++++-------
 .../pulse/features/features_comparison.erl    | 16 ++---
 .../codetoanalyze/erlang/pulse/issues.exp     |  5 +-
 .../tests/codetoanalyze/erlang/topl/Makefile  |  2 +-
 .../erlang/topl/track-xid/Makefile            | 15 ++++
 .../erlang/topl/track-xid/issues.exp          |  3 +
 .../erlang/topl/track-xid/property.topl       |  4 ++
 .../erlang/topl/track-xid/topl_track_xid.erl  | 16 +++++
 8 files changed, 96 insertions(+), 36 deletions(-)
 create mode 100644 infer/tests/codetoanalyze/erlang/topl/track-xid/Makefile
 create mode 100644 infer/tests/codetoanalyze/erlang/topl/track-xid/issues.exp
 create mode 100644 infer/tests/codetoanalyze/erlang/topl/track-xid/property.topl
 create mode 100644 infer/tests/codetoanalyze/erlang/topl/track-xid/topl_track_xid.erl

diff --git a/infer/src/pulse/PulseModelsErlang.ml b/infer/src/pulse/PulseModelsErlang.ml
index 6744ea55eea..0d0697f6b28 100644
--- a/infer/src/pulse/PulseModelsErlang.ml
+++ b/infer/src/pulse/PulseModelsErlang.ml
@@ -475,42 +475,46 @@ module Comparison = struct
 
     let le = ordering Le incompatible_lt
 
-    (** Compare two abstract values, when one of them might be an integer, by disjuncting on the
-        case whether it is (an integer) or not.
+    (** Compare two abstract values, when one of them has a known type and the other might be of the
+        same type, by disjuncting on the case whether the latter is the same type or not.
 
         Parameters (not in order):
 
-        - Two abstract values [x] and [y]. One of them is expected to ba a known integer, and the
+        - Two abstract values [x] and [y]. One of them is expected to have a known type, and the
           other one to have an undetermined dynamic type.
 
-        - The (erlang) types of [x] and [y] as determined by the caller. Corresponding to the
-          expectation mentioned above, [(ty_x, ty_y)] is expected to be either [(Integer, Any)] or
-          [(Any, Integer)] (this is not checked by the function and is the caller responsibility).
-
         - An [are_compatible] (boolean) abstract value that witnesses if the types of [x] and [y]
-          are both integers or if one of them is not (this is typically obtained by using
+          are both the same or if one of them is not (this is typically obtained by using
           {!has_erlang_type} on the Any-typed argument).
 
-        Returns: a disjunction built on top of [are_compatible], that compares [x] and [y] as
-        integers when they both are, and as incompatible values when the any-typed one is not an
-        integer. *)
-    let any_with_integer_split cmp location path ~are_compatible ty_x ty_y x y : disjunction_maker =
+        - A [cmp_compat] and a [cmp_incompat] function, each of them to be used according to the
+          corresponding value of [are_compatible]. Those are typically obtained as a monotyped
+          comparator function such as some [cmp.integer], or giving the types of [x] and [y] to some
+          [cmp.incompatible] comparator.
+
+        Returns: a disjunction built on top of [are_compatible], that compares [x] and [y] as same
+        types when they both are, and as incompatible values when the any-typed one is not same
+        typed. *)
+    let any_with_type_split cmp_compat cmp_incompat location path ~are_compatible x y :
+        disjunction_maker =
      fun astate ->
-      let int_result =
-        let** astate_int = PulseArithmetic.prune_positive are_compatible astate in
-        let++ astate_int, int_comparison = cmp.integer x y location path astate_int in
-        let int_hist = Hist.single_alloc path location "any_int_comparison" in
-        (astate_int, (int_comparison, int_hist))
+      let same_type_result =
+        let** astate_same_type = PulseArithmetic.prune_positive are_compatible astate in
+        let++ astate_same_type, same_type_comparison =
+          cmp_compat x y location path astate_same_type
+        in
+        let same_type_hist = Hist.single_alloc path location "any_same_type_comparison" in
+        (astate_same_type, (same_type_comparison, same_type_hist))
       in
       let incompatible_result =
         let** astate_incompatible = PulseArithmetic.prune_eq_zero are_compatible astate in
         let++ astate_incompatible, incompatible_comparison =
-          cmp.incompatible ty_x ty_y x y location path astate_incompatible
+          cmp_incompat x y location path astate_incompatible
         in
         let incompatible_hist = Hist.single_alloc path location "any_incompatible_comparison" in
         (astate_incompatible, (incompatible_comparison, incompatible_hist))
       in
-      SatUnsat.to_list int_result @ SatUnsat.to_list incompatible_result
+      SatUnsat.to_list same_type_result @ SatUnsat.to_list incompatible_result
   end
 
   (** Makes an abstract value holding the comparison result of two parameters. We perform a case
@@ -533,15 +537,19 @@ module Comparison = struct
 
       The final result is computed as follows:
 
-      - If the parameters are both of a supported type, integers, then we compare them accordingly
-        (eg. the [cmp.integer] function will then typically compare their value fields).
+      - If the parameters are both of a supported type, eg. integers, then we compare them
+        accordingly (eg. the [cmp.integer] function will then typically compare their value fields).
       - If the parameters have incompatible types, then we return the result of a comparison of
         incompatible types (eg. equality would be false, and inequality would be true).
       - If both parameters have the same unsupported type, then the comparison is unsupported and we
         use the [cmp.unsupported] function (that could for instance return an - overapproximating -
         unconstrained result).
-      - If at least one parameter has no known dynamic type (or, equivalently, its type is [Any]),
-        then the comparison is also unsupported.
+      - If one parameter has a supported type, say [Integer] and the other has no known dynamic type
+        (or, equivalently, its type is [Any]), then we case-split the unknown-typed value on being
+        an [Integer] or not and use the appropriate comparison on each case.
+      - If both parameters have no known dynamic type (or, equivalently, their type is [Any]), or
+        one has an unsupported type and the other one has [Any] type, then the comparison is also
+        unsupported.
 
       Note that, on supported types (eg. integers), it is important that the [cmp] functions decide
       themselves if they should compare some specific fields or not, instead of getting these fields
@@ -556,6 +564,7 @@ module Comparison = struct
     let x_typ = get_erlang_type_or_any x_val astate in
     let y_typ = get_erlang_type_or_any y_val astate in
     match (x_typ, y_typ) with
+    (* When supporting more types, this should probably be refactored to avoid N² cases. *)
     | Integer, Integer ->
         let<**> astate, result = cmp.integer x y location path astate in
         let hist = Hist.single_alloc path location "integer_comparison" in
@@ -566,11 +575,21 @@ module Comparison = struct
         [Ok (astate, (result, hist))]
     | Integer, Any ->
         let<**> astate, are_compatible = has_erlang_type y_val Integer astate in
-        Comparator.any_with_integer_split cmp location path ~are_compatible Integer Any x y astate
+        Comparator.any_with_type_split cmp.integer (cmp.incompatible Integer Any) location path
+          ~are_compatible x y astate
     | Any, Integer ->
         let<**> astate, are_compatible = has_erlang_type x_val Integer astate in
-        Comparator.any_with_integer_split cmp location path ~are_compatible Any Integer x y astate
-    | Nil, Nil | Cons, Cons | Tuple _, Tuple _ | Map, Map ->
+        Comparator.any_with_type_split cmp.integer (cmp.incompatible Any Integer) location path
+          ~are_compatible x y astate
+    | Atom, Any ->
+        let<**> astate, are_compatible = has_erlang_type y_val Atom astate in
+        Comparator.any_with_type_split cmp.atom (cmp.incompatible Atom Any) location path
+          ~are_compatible x y astate
+    | Any, Atom ->
+        let<**> astate, are_compatible = has_erlang_type x_val Atom astate in
+        Comparator.any_with_type_split cmp.atom (cmp.incompatible Any Atom) location path
+          ~are_compatible x y astate
+    | Nil, Nil | Cons, Cons | Tuple _, Tuple _ | Map, Map | Any, _ | _, Any ->
         let<**> astate, result = cmp.unsupported x y location path astate in
         let hist = Hist.single_alloc path location "unsupported_comparison" in
         [Ok (astate, (result, hist))]
diff --git a/infer/tests/codetoanalyze/erlang/pulse/features/features_comparison.erl b/infer/tests/codetoanalyze/erlang/pulse/features/features_comparison.erl
index 7e59a2196ad..866b902ca23 100644
--- a/infer/tests/codetoanalyze/erlang/pulse/features/features_comparison.erl
+++ b/infer/tests/codetoanalyze/erlang/pulse/features/features_comparison.erl
@@ -19,11 +19,11 @@
     test_equal_atom_Bad/0,
     test_equal_int_any_Latent/1,
     test_equal_int_any_2_Latent/1,
-    fn_test_equal_atom_any_Latent/1,
-    fp_test_equal_atom_any_2_Latent/1,
-    fn_test_equal_string_any_Latent/1,
+    test_equal_atom_any_Latent/1,
+    test_equal_atom_any_2_Latent/1,
+    fp_test_equal_string_any_Latent/1,
     fp_test_equal_string_any_2_Latent/1,
-    fn_test_equal_any_any_Latent/2,
+    fp_test_equal_any_any_Latent/2,
     fp_test_equal_any_any_2_Latent/2,
     test_neg_equal_atom_Ok/0,
     test_neg_equal_atom_Bad/0,
@@ -128,19 +128,19 @@ test_equal_int_any_Latent(X) ->
 test_equal_int_any_2_Latent(X) ->
     ?CRASH_IF_EQUAL(false, X == 42).
 
-fn_test_equal_atom_any_Latent(X) ->
+test_equal_atom_any_Latent(X) ->
     ?CRASH_IF_EQUAL(true, X == foo).
 
-fp_test_equal_atom_any_2_Latent(X) ->
+test_equal_atom_any_2_Latent(X) ->
     ?CRASH_IF_EQUAL(false, X == foo).
 
-fn_test_equal_string_any_Latent(X) ->
+fp_test_equal_string_any_Latent(X) ->
     ?CRASH_IF_EQUAL(true, X == "foo").
 
 fp_test_equal_string_any_2_Latent(X) ->
     ?CRASH_IF_EQUAL(false, X == "foo").
 
-fn_test_equal_any_any_Latent(X, Y) ->
+fp_test_equal_any_any_Latent(X, Y) ->
     ?CRASH_IF_EQUAL(true, X == Y).
 
 fp_test_equal_any_any_2_Latent(X, Y) ->
diff --git a/infer/tests/codetoanalyze/erlang/pulse/issues.exp b/infer/tests/codetoanalyze/erlang/pulse/issues.exp
index 41e7e4c7baf..fc964f8d1f2 100644
--- a/infer/tests/codetoanalyze/erlang/pulse/issues.exp
+++ b/infer/tests/codetoanalyze/erlang/pulse/issues.exp
@@ -44,8 +44,11 @@ codetoanalyze/erlang/pulse/features/features_comparison.erl, test_equal_int_atom
 codetoanalyze/erlang/pulse/features/features_comparison.erl, test_equal_atom_Bad/0, 3, NO_MATCH_OF_RHS, no_bucket, ERROR, [no match of RHS here]
 codetoanalyze/erlang/pulse/features/features_comparison.erl, test_equal_int_any_Latent/1, 1, NO_MATCH_OF_RHS_LATENT, no_bucket, ERROR, [no match of RHS here]
 codetoanalyze/erlang/pulse/features/features_comparison.erl, test_equal_int_any_2_Latent/1, 1, NO_MATCH_OF_RHS_LATENT, no_bucket, ERROR, [no match of RHS here]
-codetoanalyze/erlang/pulse/features/features_comparison.erl, fp_test_equal_atom_any_2_Latent/1, 1, NO_MATCH_OF_RHS, no_bucket, ERROR, [no match of RHS here]
+codetoanalyze/erlang/pulse/features/features_comparison.erl, test_equal_atom_any_Latent/1, 1, NO_MATCH_OF_RHS_LATENT, no_bucket, ERROR, [no match of RHS here]
+codetoanalyze/erlang/pulse/features/features_comparison.erl, test_equal_atom_any_2_Latent/1, 1, NO_MATCH_OF_RHS_LATENT, no_bucket, ERROR, [no match of RHS here]
+codetoanalyze/erlang/pulse/features/features_comparison.erl, fp_test_equal_string_any_Latent/1, 1, NO_MATCH_OF_RHS, no_bucket, ERROR, [no match of RHS here]
 codetoanalyze/erlang/pulse/features/features_comparison.erl, fp_test_equal_string_any_2_Latent/1, 1, NO_MATCH_OF_RHS, no_bucket, ERROR, [no match of RHS here]
+codetoanalyze/erlang/pulse/features/features_comparison.erl, fp_test_equal_any_any_Latent/2, 1, NO_MATCH_OF_RHS, no_bucket, ERROR, [no match of RHS here]
 codetoanalyze/erlang/pulse/features/features_comparison.erl, fp_test_equal_any_any_2_Latent/2, 1, NO_MATCH_OF_RHS, no_bucket, ERROR, [no match of RHS here]
 codetoanalyze/erlang/pulse/features/features_comparison.erl, test_neg_equal_atom_Bad/0, 3, NO_MATCH_OF_RHS, no_bucket, ERROR, [no match of RHS here]
 codetoanalyze/erlang/pulse/features/features_comparison.erl, test_exactly_equal_Bad/0, 3, NO_MATCH_OF_RHS, no_bucket, ERROR, [no match of RHS here]
diff --git a/infer/tests/codetoanalyze/erlang/topl/Makefile b/infer/tests/codetoanalyze/erlang/topl/Makefile
index 1d57b3ade1e..582d3728c85 100644
--- a/infer/tests/codetoanalyze/erlang/topl/Makefile
+++ b/infer/tests/codetoanalyze/erlang/topl/Makefile
@@ -4,7 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 # NOTE: If you wish to deactivate a Topl test, simply remove it from this list
-SUBDIRS = taint file process reach-fun reach-pair reach-simple less fields atom-literal atom-name str-literal
+SUBDIRS = taint file process reach-fun reach-pair reach-simple less fields atom-literal atom-name str-literal track-xid
 
 test-%:
 	$(MAKE) -C $* test
diff --git a/infer/tests/codetoanalyze/erlang/topl/track-xid/Makefile b/infer/tests/codetoanalyze/erlang/topl/track-xid/Makefile
new file mode 100644
index 00000000000..feeb57e487d
--- /dev/null
+++ b/infer/tests/codetoanalyze/erlang/topl/track-xid/Makefile
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+TESTS_DIR = ../../../..
+
+INFER_OPTIONS = --topl-only --topl-properties property.topl --enable-issue-type TOPL_ERROR_LATENT --project-root $(TESTS_DIR)
+INFERPRINT_OPTIONS = --issues-tests
+
+SOURCES = $(wildcard *.erl)
+
+include $(TESTS_DIR)/erlc.make
+
+infer-out/report.json: $(MAKEFILE_LIST)
diff --git a/infer/tests/codetoanalyze/erlang/topl/track-xid/issues.exp b/infer/tests/codetoanalyze/erlang/topl/track-xid/issues.exp
new file mode 100644
index 00000000000..8f8913acac6
--- /dev/null
+++ b/infer/tests/codetoanalyze/erlang/topl/track-xid/issues.exp
@@ -0,0 +1,3 @@
+codetoanalyze/erlang/topl/track-xid/topl_track_xid.erl, direct/1, 0, TOPL_ERROR, no_bucket, ERROR, [call to src/2,call to tgt/2]
+codetoanalyze/erlang/topl/track-xid/topl_track_xid.erl, tuple/1, 0, TOPL_ERROR_LATENT, no_bucket, ERROR, [call to src/2,call to tgt/2]
+codetoanalyze/erlang/topl/track-xid/topl_track_xid.erl, map/1, 0, TOPL_ERROR_LATENT, no_bucket, ERROR, [call to src/2,call to tgt/2]
diff --git a/infer/tests/codetoanalyze/erlang/topl/track-xid/property.topl b/infer/tests/codetoanalyze/erlang/topl/track-xid/property.topl
new file mode 100644
index 00000000000..866a0b766f8
--- /dev/null
+++ b/infer/tests/codetoanalyze/erlang/topl/track-xid/property.topl
@@ -0,0 +1,4 @@
+property Xid
+  start->start: *
+  start->track: "b:src/2"(V, X, R) when X:Cons.strval == "xid" => r := V
+  track->error: "b:tgt/2"(V, X, R) when X:Cons.strval == "xid" && r == V
diff --git a/infer/tests/codetoanalyze/erlang/topl/track-xid/topl_track_xid.erl b/infer/tests/codetoanalyze/erlang/topl/track-xid/topl_track_xid.erl
new file mode 100644
index 00000000000..01266fbd8d2
--- /dev/null
+++ b/infer/tests/codetoanalyze/erlang/topl/track-xid/topl_track_xid.erl
@@ -0,0 +1,16 @@
+-module(topl_track_xid).
+-export([direct/1, tuple/1, map/1]).
+
+direct(X) ->
+  b:src(X, "xid"),
+  b:tgt(X, "xid").
+
+tuple(X) ->
+  {Y} = X,
+  b:src(Y, "xid"),
+  b:tgt(Y, "xid").
+
+map(X) ->
+  #{y:=Y} = X,
+  b:src(Y, "xid"),
+  b:tgt(Y, "xid").