From e9955117d4b6fd49b026f88ed556754e6e28908c Mon Sep 17 00:00:00 2001 From: Jules Villard Date: Tue, 3 Dec 2024 05:40:51 -0800 Subject: [PATCH] [pulse] suppress mutual recursion reports when heap progress has been made Summary: This usually indicates the recursion is intended as we are traversing an inductive data structure. Could miss real bugs caused by cycles in the heap in other calling contexts. Reviewed By: skcho Differential Revision: D66667343 Privacy Context Container: L1208441 fbshipit-source-id: 64fd07d914168a5d6fa27734a249720e0057486a --- infer/src/pulse/PulseAbductiveDomain.ml | 28 ++++++++++++ infer/src/pulse/PulseAbductiveDomain.mli | 6 +++ infer/src/pulse/PulseCallOperations.ml | 26 +++++++++-- infer/src/pulse/PulseInterproc.ml | 19 ++++++-- infer/tests/codetoanalyze/c/pulse/issues.exp | 1 - .../tests/codetoanalyze/hack/pulse/issues.exp | 1 + .../codetoanalyze/hack/pulse/recursion.hack | 43 +++++++++++++++++++ 7 files changed, 116 insertions(+), 8 deletions(-) create mode 100644 infer/tests/codetoanalyze/hack/pulse/recursion.hack diff --git a/infer/src/pulse/PulseAbductiveDomain.ml b/infer/src/pulse/PulseAbductiveDomain.ml index 08e2ba8eb47..86ed6c137d0 100644 --- a/infer/src/pulse/PulseAbductiveDomain.ml +++ b/infer/src/pulse/PulseAbductiveDomain.ml @@ -2332,6 +2332,34 @@ let reachable_addresses_from ?edge_filter addresses astate pre_or_post = |> CanonValue.downcast_set +let has_reachable_in_inner_pre_heap addresses astate = + (* We are looking for one "real" (i.e. in the program text somewhere) dereference from the stack + variables in the pre-condition. Since the first dereference is always to access the value of + the formal, it is enough to look for access paths with at least two dereferences. *) + let has_two_dereferences accesses = + let rec has_two_dereferences_aux has_deref (accesses : Access.t list) = + match accesses with + | [] -> + false + | Dereference :: accesses' -> + has_deref || has_two_dereferences_aux true accesses' + | _ :: accesses' -> + has_two_dereferences_aux has_deref accesses' + in + has_two_dereferences_aux false accesses + in + let addresses = + ListLabels.to_seq addresses |> Seq.map (CanonValue.canon' astate) |> CanonValue.Set.of_seq + in + GraphVisit.fold astate + ~var_filter:(fun _ -> true) + `Pre ~init:false ~finish:Fn.id + ~f:(fun _ found v accesses -> + if CanonValue.Set.mem v addresses && has_two_dereferences accesses then Stop true + else Continue found ) + |> snd + + module Stack = struct include SafeStack diff --git a/infer/src/pulse/PulseAbductiveDomain.mli b/infer/src/pulse/PulseAbductiveDomain.mli index 5b2f1a15095..28a28c0e6d8 100644 --- a/infer/src/pulse/PulseAbductiveDomain.mli +++ b/infer/src/pulse/PulseAbductiveDomain.mli @@ -353,6 +353,12 @@ val reachable_addresses_from : -> AbstractValue.Set.t (** Compute the set of abstract addresses that are reachable from given abstract addresses. *) +val has_reachable_in_inner_pre_heap : AbstractValue.t list -> t -> bool +(** [true] if there is a value in the provided list that is reachable from the pre-condition after + some non-trivial steps in the pre heap, i.e. the value is gotten from at least one dereference + from the parameters of the current procedure. Used to detect likely-harmless recursive calls + since heap progress has been made. *) + val get_unreachable_attributes : t -> AbstractValue.t list (** collect the addresses that have attributes but are unreachable in the current post-condition *) diff --git a/infer/src/pulse/PulseCallOperations.ml b/infer/src/pulse/PulseCallOperations.ml index 6bcf1a54a92..489ef3decb4 100644 --- a/infer/src/pulse/PulseCallOperations.ml +++ b/infer/src/pulse/PulseCallOperations.ml @@ -671,9 +671,29 @@ let on_recursive_call ({InterproceduralAnalysis.proc_desc} as analysis_data) cal L.d_printfln "Suppressing recursive call report for non-user-visible function %a" Procname.pp callee_pname | _ -> - PulseReport.report analysis_data ~is_suppressed:false ~latent:false - (MutualRecursionCycle - {cycle= PulseMutualRecursion.mk call_loc callee_pname; location= call_loc} ) ) ; + if + AbductiveDomain.has_reachable_in_inner_pre_heap + (List.map actuals ~f:(fun ((actual, _), _) -> actual)) + astate + then + L.d_printfln + "heap progress made before recursive call to %a; unlikely to be an infinite recursion, \ + suppressing report" + Procname.pp callee_pname + else + PulseReport.report analysis_data ~is_suppressed:false ~latent:false + (MutualRecursionCycle + {cycle= PulseMutualRecursion.mk call_loc callee_pname; location= call_loc} ) ) ; + astate ) + else if + AbductiveDomain.has_reachable_in_inner_pre_heap + (List.map actuals ~f:(fun ((actual, _), _) -> actual)) + astate + then ( + L.d_printfln + "heap progress made before recursive call to %a; unlikely to be an infinite recursion, not \ + recording the cycle" + Procname.pp callee_pname ; astate ) else AbductiveDomain.add_recursive_call call_loc callee_pname astate diff --git a/infer/src/pulse/PulseInterproc.ml b/infer/src/pulse/PulseInterproc.ml index d9f8289815e..0060e557064 100644 --- a/infer/src/pulse/PulseInterproc.ml +++ b/infer/src/pulse/PulseInterproc.ml @@ -964,7 +964,7 @@ let report_mutual_recursion_cycle let record_recursive_calls ({InterproceduralAnalysis.proc_desc} as analysis_data) callee_proc_name - call_loc callee_summary call_state = + call_loc actuals callee_summary call_state = if Procname.is_hack_xinit (Procdesc.get_proc_name proc_desc) then ( L.d_printfln "Not recording recursive calls for Hack xinit caller function %a" Procname.pp (Procdesc.get_proc_name proc_desc) ; @@ -975,6 +975,16 @@ let record_recursive_calls ({InterproceduralAnalysis.proc_desc} as analysis_data L.d_printfln "Not recording recursive calls for Hack xinit callee function %a" Procname.pp callee_proc_name ; call_state ) + else if + AbductiveDomain.has_reachable_in_inner_pre_heap + (List.map actuals ~f:(fun ((actual, _), _) -> actual)) + call_state.astate + then ( + L.d_printfln + "Not recording recursive calls for function %a since heap progress has been made to compute \ + actuals" + Procname.pp callee_proc_name ; + call_state ) else let callee_recursive_calls = PulseMutualRecursion.Set.filter_map @@ -1101,7 +1111,7 @@ let read_return_value {PathContext.timestamp} callee_proc_name call_loc ) ) -let apply_post analysis_data path callee_proc_name call_location callee_summary call_state = +let apply_post analysis_data path callee_proc_name call_location actuals callee_summary call_state = PerfEvent.(log (fun logger -> log_begin_event logger ~name:"pulse call post" ())) ; let r = call_state @@ -1110,7 +1120,7 @@ let apply_post analysis_data path callee_proc_name call_location callee_summary >>= apply_post_from_callee_post path callee_proc_name call_location callee_summary >>| add_attributes `Post path callee_proc_name call_location (AbductiveDomain.Summary.get_post callee_summary).attrs - >>| record_recursive_calls analysis_data callee_proc_name call_location callee_summary + >>| record_recursive_calls analysis_data callee_proc_name call_location actuals callee_summary >>| record_skipped_calls callee_proc_name call_location callee_summary >>| record_transitive_info analysis_data callee_proc_name call_location callee_summary >>| read_return_value path callee_proc_name call_location callee_summary @@ -1295,7 +1305,8 @@ let apply_summary analysis_data path ~callee_proc_name call_location ~callee_sum let call_state = {call_state with astate; visited= AddressSet.empty} in (* apply the postcondition *) let* call_state, return_caller = - apply_post analysis_data path callee_proc_name call_location callee_summary call_state + apply_post analysis_data path callee_proc_name call_location actuals callee_summary + call_state in let astate = if Topl.is_active () then diff --git a/infer/tests/codetoanalyze/c/pulse/issues.exp b/infer/tests/codetoanalyze/c/pulse/issues.exp index 7d6e2935cdd..22ef1ba7f94 100644 --- a/infer/tests/codetoanalyze/c/pulse/issues.exp +++ b/infer/tests/codetoanalyze/c/pulse/issues.exp @@ -108,7 +108,6 @@ codetoanalyze/c/pulse/latent.c, FN_crash_after_six_nodes_bad, 1, NULLPTR_DEREFER codetoanalyze/c/pulse/latent.c, FN_crash_after_six_nodes_bad, 1, NULLPTR_DEREFERENCE_LATENT, no_bucket, ERROR, [*** SUPPRESSED ***,source of the null value part of the trace starts here,is assigned to the null pointer,null pointer dereference part of the trace starts here,parameter `q` of FN_crash_after_six_nodes_bad,invalid access occurs here] codetoanalyze/c/pulse/latent.c, FN_crash_after_six_nodes_bad, 1, NULLPTR_DEREFERENCE_LATENT, no_bucket, ERROR, [*** SUPPRESSED ***,source of the null value part of the trace starts here,is assigned to the null pointer,null pointer dereference part of the trace starts here,parameter `q` of FN_crash_after_six_nodes_bad,invalid access occurs here] codetoanalyze/c/pulse/latent.c, FN_crash_after_six_nodes_bad, 1, NULLPTR_DEREFERENCE_LATENT, no_bucket, ERROR, [*** SUPPRESSED ***,source of the null value part of the trace starts here,is assigned to the null pointer,null pointer dereference part of the trace starts here,parameter `q` of FN_crash_after_six_nodes_bad,invalid access occurs here] -codetoanalyze/c/pulse/list_api.c, list_elem_free, 5, MUTUAL_RECURSION_CYCLE, no_bucket, WARNING, [`list_elem_free` makes a recursive call to `list_elem_free`] codetoanalyze/c/pulse/list_api.c, list_free, 1, NULLPTR_DEREFERENCE_LATENT, no_bucket, ERROR, [*** SUPPRESSED ***,source of the null value part of the trace starts here,is assigned to the null pointer,null pointer dereference part of the trace starts here,parameter `ptr` of list_free,invalid access occurs here] codetoanalyze/c/pulse/list_checks.c, null_ptr_deref_bad, 0, NULLPTR_DEREFERENCE, no_bucket, ERROR, [is assigned to the null pointer,when calling `go_to_next` here,parameter `head` of go_to_next,invalid access occurs here] codetoanalyze/c/pulse/list_checks.c, null_ptr_deref2_bad, 0, NULLPTR_DEREFERENCE, no_bucket, ERROR, [is assigned to the null pointer,when calling `go_to_end_of_list` here,parameter `head` of go_to_end_of_list,invalid access occurs here] diff --git a/infer/tests/codetoanalyze/hack/pulse/issues.exp b/infer/tests/codetoanalyze/hack/pulse/issues.exp index d5b3dc854ac..d6da2877154 100644 --- a/infer/tests/codetoanalyze/hack/pulse/issues.exp +++ b/infer/tests/codetoanalyze/hack/pulse/issues.exp @@ -215,6 +215,7 @@ propagators.hack, Propagators::Flows$static.propToSinkBad, 3, TAINT_ERROR, no_bu propagators.hack, Propagators::Flows$static.propWithSinkBad, 2, TAINT_ERROR, no_bucket, ERROR, [source of the taint here: value returned from `Propagators::Source$static.getTainted` with kind `Propagators`,when calling `Propagators::Prop$static.propWithSink` here,flows to this sink: value passed as argument `#0` to `Propagators::Sink$static.process` with kind `Propagators`], source: Propagators::Source$static.getTainted, sink: Propagators::Sink$static.process, tainted expression: Propagators::Prop$static.propWithSink() propagators.hack, Propagators::Flows$static.propWithSinkToSinkBad, 2, TAINT_ERROR, no_bucket, ERROR, [source of the taint here: value returned from `Propagators::Source$static.getTainted` with kind `Propagators`,when calling `Propagators::Prop$static.propWithSink` here,flows to this sink: value passed as argument `#0` to `Propagators::Sink$static.process` with kind `Propagators`], source: Propagators::Source$static.getTainted, sink: Propagators::Sink$static.process, tainted expression: Propagators::Prop$static.propWithSink() propagators.hack, Propagators::Flows$static.propWithSinkToSinkBad, 3, TAINT_ERROR, no_bucket, ERROR, [source of the taint here: value returned from `Propagators::Source$static.getTainted` with kind `Propagators`,flows to this sink: value passed as argument `#0` to `Propagators::Sink$static.process` with kind `Propagators`], source: Propagators::Source$static.getTainted, sink: Propagators::Sink$static.process, tainted expression: $t1 +recursion.hack, Node.infinite_bad, 1, MUTUAL_RECURSION_CYCLE, no_bucket, WARNING, [`Node.infinite_bad` makes a recursive call to `Node.infinite_bad`] return_type_matcher.hack, ReturnTypeMatcher::Flows$static.source1TaintFlowBad, 2, TAINT_ERROR, no_bucket, ERROR, [source of the taint here: value returned from `ReturnTypeMatcher::Source1$static.getTainted` with kind `ReturnTypeMatcher`,flows to this sink: value passed as argument `#0` to `ReturnTypeMatcher::Sink$static.process` with kind `ReturnTypeMatcher`], source: ReturnTypeMatcher::Source1$static.getTainted, sink: ReturnTypeMatcher::Sink$static.process, tainted expression: $source return_type_matcher.hack, ReturnTypeMatcher::Flows$static.source2TaintFlowBad, 2, TAINT_ERROR, no_bucket, ERROR, [source of the taint here: value returned from `ReturnTypeMatcher::Source2$static.getTainted` with kind `ReturnTypeMatcher`,flows to this sink: value passed as argument `#0` to `ReturnTypeMatcher::Sink$static.process` with kind `ReturnTypeMatcher`], source: ReturnTypeMatcher::Source2$static.getTainted, sink: ReturnTypeMatcher::Sink$static.process, tainted expression: $source sanitizers.hack, Sanitizers::Flows$static.taintedNotSanitizedBad, 2, TAINT_ERROR, no_bucket, ERROR, [source of the taint here: value returned from `Sanitizers::Source.getTainted` with kind `Sanitizers`,flows to this sink: value passed as argument `#0` to `Sanitizers::Sink$static.process` with kind `Sanitizers`], source: Sanitizers::Source.getTainted, sink: Sanitizers::Sink$static.process, tainted expression: $t diff --git a/infer/tests/codetoanalyze/hack/pulse/recursion.hack b/infer/tests/codetoanalyze/hack/pulse/recursion.hack new file mode 100644 index 00000000000..75f30d494bb --- /dev/null +++ b/infer/tests/codetoanalyze/hack/pulse/recursion.hack @@ -0,0 +1,43 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +class Node { + private Node $next; + private int $data; + + public function __construct(int $data, Node $next) { + $this->data = $data; + $this->next = $next; + } + + public function length_ok(): int { + if ($this->next == null) { + return 1; + } + return 1 + $this->next->length_ok(); + } + + // progress in callee + public function interproc_progress_ok(Node $list): void { + $this->interproc2_ok($list); + } + + private function interproc2_ok(Node $list): void { + $this->interproc_progress_ok($list->next); + } + + // progress before callee + public function other_interproc_progress_ok(Node $list): void { + $this->other_interproc2_ok($list->next); + } + + private function other_interproc2_ok(Node $list): void { + $this->other_interproc_progress_ok($list); + } + + public function infinite_bad(Node $something, Node $other): int { + return $this->infinite_bad($other, $something); + } +}