From 80ee243410f8708bfca45e2dd799d540bd4b10ed Mon Sep 17 00:00:00 2001 From: David Pichardie Date: Mon, 4 Nov 2024 06:58:06 -0800 Subject: [PATCH] [inferpython] level0 example Summary: we write a minimalist tainting challenge and solve it with new builtin models. For this example, we need to understand the module body and follow some closure resolutions to find the right source/sinks. Reviewed By: geralt-encore Differential Revision: D65323049 Privacy Context Container: L1208441 fbshipit-source-id: 413d75f746bbbf7d9e6e275bed420889bf17afad --- infer/src/pulse/PulseModelsDSL.ml | 23 ++++- infer/src/pulse/PulseModelsDSL.mli | 2 +- infer/src/pulse/PulseModelsHack.ml | 6 +- infer/src/pulse/PulseModelsPython.ml | 92 ++++++++++++++++++- infer/src/textual/TextualSil.ml | 2 + infer/src/textual/TextualSil.mli | 4 + .../codetoanalyze/python/pulse/.inferconfig | 12 +++ .../python/pulse/.inferconfig.bak | 17 ---- .../codetoanalyze/python/pulse/issues.exp | 1 + .../codetoanalyze/python/pulse/level0.py | 17 ++++ 10 files changed, 148 insertions(+), 28 deletions(-) create mode 100644 infer/tests/codetoanalyze/python/pulse/.inferconfig delete mode 100644 infer/tests/codetoanalyze/python/pulse/.inferconfig.bak create mode 100644 infer/tests/codetoanalyze/python/pulse/level0.py diff --git a/infer/src/pulse/PulseModelsDSL.ml b/infer/src/pulse/PulseModelsDSL.ml index 04441a7b13b..3d2fa39f7bd 100644 --- a/infer/src/pulse/PulseModelsDSL.ml +++ b/infer/src/pulse/PulseModelsDSL.ml @@ -799,12 +799,27 @@ module Syntax = struct |> exec_partial_command - let apply_hack_closure (closure : aval) closure_args : aval model_monad = - let typ = Typ.mk_ptr (Typ.mk_struct TextualSil.hack_mixed_type_name) in + let apply_closure lang (closure : aval) closure_args : aval model_monad = + let mixed_type_name = + match (lang : Textual.Lang.t) with + | Hack -> + TextualSil.hack_mixed_type_name + | Python -> + TextualSil.python_mixed_type_name + | Java -> + L.die InternalError "apply_closure is not supported on Java" + in + let typ = Typ.mk_ptr (Typ.mk_struct mixed_type_name) in let args = closure :: closure_args in let unresolved_pname = - Procname.make_hack ~class_name:(Some HackClassName.wildcard) ~function_name:"__invoke" - ~arity:(Some (List.length args)) + match (lang : Textual.Lang.t) with + | Hack -> + Procname.make_hack ~class_name:(Some HackClassName.wildcard) ~function_name:"__invoke" + ~arity:(Some (List.length args)) + | Python -> + Procname.make_python ~class_name:(Some PythonClassName.wildcard) ~function_name:"call" + | Java -> + L.die InternalError "apply_closure is not supported on Java" in let* opt_dynamic_type_data = get_dynamic_type ~ask_specialization:true closure in match opt_dynamic_type_data with diff --git a/infer/src/pulse/PulseModelsDSL.mli b/infer/src/pulse/PulseModelsDSL.mli index 6def12d9a4e..0c30cf0914a 100644 --- a/infer/src/pulse/PulseModelsDSL.mli +++ b/infer/src/pulse/PulseModelsDSL.mli @@ -80,7 +80,7 @@ module Syntax : sig -> ValueOrigin.t ProcnameDispatcher.Call.FuncArg.t list -> unit model_monad - val apply_hack_closure : aval -> aval list -> aval model_monad + val apply_closure : Textual.Lang.t -> aval -> aval list -> aval model_monad val get_data : model_data model_monad diff --git a/infer/src/pulse/PulseModelsHack.ml b/infer/src/pulse/PulseModelsHack.ml index 1e24cb9b180..c646f48e19a 100644 --- a/infer/src/pulse/PulseModelsHack.ml +++ b/infer/src/pulse/PulseModelsHack.ml @@ -183,7 +183,7 @@ module Vec = struct prune_eq_int size_val IntLit.one @@> let* fst_val = load_access arg (FieldAccess fst_field) in - let* mapped_fst_val = apply_hack_closure closure [fst_val] in + let* mapped_fst_val = apply_closure Hack closure [fst_val] in new_vec_dsl [mapped_fst_val] in let size_gt_1_case : DSL.aval DSL.model_monad = @@ -191,8 +191,8 @@ module Vec = struct @@> let* fst_val = load_access arg (FieldAccess fst_field) in let* snd_val = load_access arg (FieldAccess snd_field) in - let* mapped_fst_val = apply_hack_closure closure [fst_val] in - let* mapped_snd_val = apply_hack_closure closure [snd_val] in + let* mapped_fst_val = apply_closure Hack closure [fst_val] in + let* mapped_snd_val = apply_closure Hack closure [snd_val] in new_vec_dsl ~know_size:(Some size_val) [mapped_fst_val; mapped_snd_val] in let* ret = disj [size_eq_0_case; size_eq_1_case; size_gt_1_case] in diff --git a/infer/src/pulse/PulseModelsPython.ml b/infer/src/pulse/PulseModelsPython.ml index 122190b4d4e..a944021df2b 100644 --- a/infer/src/pulse/PulseModelsPython.ml +++ b/infer/src/pulse/PulseModelsPython.ml @@ -6,22 +6,108 @@ *) open! IStd +module F = Format +module L = Logging open PulseBasicInterface open PulseModelsImport module DSL = PulseModelsDSL let dict_tname = TextualSil.python_dict_type_name -let make_dictionnary _args : model = +let none_tname = TextualSil.python_none_type_name + +let sil_fieldname_from_string_value_exn ((address, _) : DSL.aval) : Fieldname.t DSL.model_monad = + let f astate = + match PulseArithmetic.as_constant_string astate address with + | Some str -> + (TextualSil.wildcard_sil_fieldname Python str, astate) + | None -> + L.die InternalError "expecting constant string value" + in + DSL.Syntax.exec_operation f + + +module Dict = struct + let make keys args : DSL.aval DSL.model_monad = + let open DSL.Syntax in + if not (Int.equal (List.length args) (List.length keys)) then + L.die InternalError "Dict.make expects two list of same length@\n" ; + let bindings = List.zip_exn keys args in + let* dict = constructor dict_tname bindings in + ret dict + + + let get dict key : DSL.aval DSL.model_monad = + let open DSL.Syntax in + let* field = sil_fieldname_from_string_value_exn key in + load_access dict (FieldAccess field) + + + let set dict key value : unit DSL.model_monad = + let open DSL.Syntax in + let* field = sil_fieldname_from_string_value_exn key in + let* () = store_field ~ref:dict field value in + ret () +end + +let call closure _arg_names args : model = + (* TODO: take into account arg_names *) + (* TODO: fix the name of the positional arguments *) let open DSL.Syntax in start_model @@ fun () -> + let keys = List.init (List.length args) ~f:(fun i -> F.asprintf "#%d" i) in + let* locals = Dict.make keys args in + let* value = apply_closure Python closure [locals] in + assign_ret value + + +let load_name name locals _globals : model = + let open DSL.Syntax in + start_model + @@ fun () -> + let* value = Dict.get locals name in + (* TODO: decide what we do if the binding is missing in locals *) + assign_ret value + + +let make_function closure _default_values _default_values_kw _annotations _cells_for_closure : model + = + let open DSL.Syntax in + start_model @@ fun () -> assign_ret closure + + +let make_dictionary _args : model = let open DSL.Syntax in - let* dict = constructor dict_tname [] in + start_model + @@ fun () -> + (* TODO: take args into account *) + let* dict = Dict.make [] [] in assign_ret dict +let store_name name locals _globals value : model = + let open DSL.Syntax in + start_model @@ fun () -> Dict.set locals name value + + +let make_none : model = + let open DSL.Syntax in + start_model + @@ fun () -> + let* none = constructor none_tname [] in + assign_ret none + + let matchers : matcher list = let open ProcnameDispatcher.Call in - [-"$builtins" &:: "py_make_dictionnary" &::.*+++> make_dictionnary] + [ -"$builtins" &:: "py_call" <>$ capt_arg_payload $+ capt_arg_payload $+++$--> call + ; -"$builtins" &:: "py_make_dictionary" &::.*+++> make_dictionary + ; -"$builtins" &:: "py_make_function" <>$ capt_arg_payload $+ capt_arg_payload $+ capt_arg_payload + $+ capt_arg_payload $+ capt_arg_payload $--> make_function + ; -"$builtins" &:: "py_load_name" <>$ capt_arg_payload $+ capt_arg_payload $+ capt_arg_payload + $--> load_name + ; -"$builtins" &:: "py_make_none" <>--> make_none + ; -"$builtins" &:: "py_store_name" <>$ capt_arg_payload $+ capt_arg_payload $+ capt_arg_payload + $+ capt_arg_payload $--> store_name ] |> List.map ~f:(ProcnameDispatcher.Call.contramap_arg_payload ~f:ValueOrigin.addr_hist) diff --git a/infer/src/textual/TextualSil.ml b/infer/src/textual/TextualSil.ml index 8e851681a09..06f479bbe39 100644 --- a/infer/src/textual/TextualSil.ml +++ b/infer/src/textual/TextualSil.ml @@ -139,6 +139,8 @@ let python_mixed_type_name = SilTyp.PythonClass (PythonClassName.make "PyObject" let python_dict_type_name = SilTyp.PythonClass (PythonClassName.make "PyDict") +let python_none_type_name = SilTyp.PythonClass (PythonClassName.make "PyNone") + let mk_python_mixed_type_textual loc = Typ.Struct TypeName.{value= "PyObject"; loc} let default_return_type (lang : Lang.t option) loc = diff --git a/infer/src/textual/TextualSil.mli b/infer/src/textual/TextualSil.mli index e3bb137294c..fcaec8cc6bf 100644 --- a/infer/src/textual/TextualSil.mli +++ b/infer/src/textual/TextualSil.mli @@ -54,6 +54,10 @@ val hack_root_type_name : Typ.name val python_dict_type_name : Typ.name +val python_none_type_name : Typ.name + +val python_mixed_type_name : Typ.name + val wildcard_sil_fieldname : Textual.Lang.t -> string -> Fieldname.t val textual_ext : string diff --git a/infer/tests/codetoanalyze/python/pulse/.inferconfig b/infer/tests/codetoanalyze/python/pulse/.inferconfig new file mode 100644 index 00000000000..45dfea4c456 --- /dev/null +++ b/infer/tests/codetoanalyze/python/pulse/.inferconfig @@ -0,0 +1,12 @@ +{ + "scheduler": "callgraph", + "pulse-only": true, + "pulse-specialization-partial": true, + "pulse-taint-short-traces": true, + "pulse-taint-sources": [ + { "procedure": "level0::taint_source" } + ], + "pulse-taint-sinks": [ + { "procedure": "level0::taint_sink" } + ] +} diff --git a/infer/tests/codetoanalyze/python/pulse/.inferconfig.bak b/infer/tests/codetoanalyze/python/pulse/.inferconfig.bak deleted file mode 100644 index c3b8563cbcb..00000000000 --- a/infer/tests/codetoanalyze/python/pulse/.inferconfig.bak +++ /dev/null @@ -1,17 +0,0 @@ -{ - "scheduler": "callgraph", - "pulse-only": true, - "pulse-force-continue": true, - "pulse-taint-sources": [ - { "procedure": "level1.taintSource" }, - { "class_names": ["level2::C1$static"], "method_names": ["taintSource"] }, - { "class_names": ["level3::C10"], "method_names": ["taintSource"] }, - { "class_names": ["level4::C20$static"], "method_names": ["taintSource"] } - ], - "pulse-taint-sinks": [ - { "procedure": "level1.taintSink" }, - { "class_names": ["level2::C2$static"], "method_names": ["taintSink"] }, - { "class_names": ["level3::C11"], "method_names": ["taintSink"] }, - { "class_names": ["level4::C20$static"], "method_names": ["taintSink"] } - ] -} diff --git a/infer/tests/codetoanalyze/python/pulse/issues.exp b/infer/tests/codetoanalyze/python/pulse/issues.exp index e69de29bb2d..8dc3a677e1a 100644 --- a/infer/tests/codetoanalyze/python/pulse/issues.exp +++ b/infer/tests/codetoanalyze/python/pulse/issues.exp @@ -0,0 +1 @@ +level0.py, level0::__module_body__, 14, TAINT_ERROR, no_bucket, ERROR, [in call to `closure:level0:1.call`,source of the taint here: value returned from `level0::taint_source` with kind `Simple`,return from call to `closure:level0:1.call`,when calling `closure:level0:0.call` here,flows to this sink: value passed as argument `#1` to `level0::taint_sink` with kind `Simple`], source: level0::taint_source, sink: level0::taint_sink, tainted expression: UNKNOWN diff --git a/infer/tests/codetoanalyze/python/pulse/level0.py b/infer/tests/codetoanalyze/python/pulse/level0.py new file mode 100644 index 00000000000..08391f1b17b --- /dev/null +++ b/infer/tests/codetoanalyze/python/pulse/level0.py @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +def taint_sink(x): + pass + +def taint_source(): + pass + +def no_taint_source(): + pass + +taint_sink(taint_source()) + +taint_sink(no_taint_source())