From e7341f7d60f0475ea5a1c2be0aa99a16cf2f7ab5 Mon Sep 17 00:00:00 2001 From: David Pichardie Date: Mon, 9 Dec 2024 05:55:03 -0800 Subject: [PATCH] [inferpython] attach a dynamic type to each module object Summary: we need to track more precisely each module object. We will be help with static types because the type is final, so dynamic=static. Reviewed By: jvillard Differential Revision: D66700538 Privacy Context Container: L1208441 fbshipit-source-id: 725b019575454fabcd43b65895056d87c17ab4dd --- infer/src/IR/PythonClassName.ml | 15 +-- infer/src/IR/PythonClassName.mli | 18 ++-- infer/src/IR/Typ.ml | 12 +++ infer/src/IR/Typ.mli | 6 ++ infer/src/pulse/PulseAbductiveDomain.ml | 3 +- infer/src/pulse/PulseModelsPython.ml | 101 ++++++++---------- infer/src/textual/TextualSil.ml | 6 -- infer/src/textual/TextualSil.mli | 4 - .../python/pulse/async_level0.py | 6 +- .../codetoanalyze/python/pulse/issues.exp | 3 +- .../python/pulse/taint_import_simple.py | 4 +- 11 files changed, 82 insertions(+), 96 deletions(-) diff --git a/infer/src/IR/PythonClassName.ml b/infer/src/IR/PythonClassName.ml index 05a7b4d6895..244f43deea0 100644 --- a/infer/src/IR/PythonClassName.ml +++ b/infer/src/IR/PythonClassName.ml @@ -26,17 +26,10 @@ let pp fmt {classname} = F.fprintf fmt "%s" classname let to_string = Pp.string_of_pp pp -let static_suffix = "$static" +let globals_prefix = "PyGlobals::" -let len_static_suffix = String.length static_suffix +let is_module {classname} = String.is_prefix classname ~prefix:globals_prefix -let static_companion {classname} = {classname= classname ^ static_suffix} +let get_module_name {classname} = String.chop_prefix classname ~prefix:globals_prefix -let is_static {classname} = StringLabels.ends_with ~suffix:static_suffix classname - -let static_companion_origin ({classname} as name) = - let len_classname = String.length classname in - if len_classname > len_static_suffix then - let len = len_classname - len_static_suffix in - {classname= StringLabels.sub ~pos:0 ~len classname} - else name +let is_final name = is_module name diff --git a/infer/src/IR/PythonClassName.mli b/infer/src/IR/PythonClassName.mli index f78d04f175e..3ec252e200a 100644 --- a/infer/src/IR/PythonClassName.mli +++ b/infer/src/IR/PythonClassName.mli @@ -16,21 +16,17 @@ val classname : t -> string val components : t -> string list -val wildcard : t [@@warning "-unused-value-declaration"] +val wildcard : t val pp : F.formatter -> t -> unit val to_string : t -> string -val static_companion : t -> t -[@@warning "-unused-value-declaration"] -(** return the class of the companion class object of this class eg: Foo -> Foo$static *) +val is_final : t -> bool -val static_companion_origin : t -> t -[@@warning "-unused-value-declaration"] -(** return the origin class of a companion class object eg: Foo$static -> Foo. the result is not - specified if is the name is not a valid static class name *) +val is_module : t -> bool -val is_static : t -> bool -[@@warning "-unused-value-declaration"] -(** tests if the name is a valid static class name (ie. ends with "$static") *) +val get_module_name : t -> string option +(** will return the string representation of the module iff type name is a module type *) + +val globals_prefix : string diff --git a/infer/src/IR/Typ.ml b/infer/src/IR/Typ.ml index 8e5e2fed5b8..601701754d7 100644 --- a/infer/src/IR/Typ.ml +++ b/infer/src/IR/Typ.ml @@ -595,6 +595,18 @@ module Name = struct let is_python_class name = match name with PythonClass _ -> true | _ -> false + let is_python_final name = + match name with PythonClass py -> PythonClassName.is_final py | _ -> false + + + let is_python_module name = + match name with PythonClass py -> PythonClassName.is_module py | _ -> false + + + let get_python_module_name name = + match name with PythonClass py -> PythonClassName.get_module_name py | _ -> None + + let is_same_type t1 t2 = match (t1, t2) with | CStruct _, CStruct _ diff --git a/infer/src/IR/Typ.mli b/infer/src/IR/Typ.mli index 89fa3f05fad..9a10d497d09 100644 --- a/infer/src/IR/Typ.mli +++ b/infer/src/IR/Typ.mli @@ -204,6 +204,12 @@ module Name : sig val is_python_class : t -> bool + val is_python_final : t -> bool + + val is_python_module : t -> bool + + val get_python_module_name : t -> string option + module C : sig val from_string : string -> t diff --git a/infer/src/pulse/PulseAbductiveDomain.ml b/infer/src/pulse/PulseAbductiveDomain.ml index 802c647a185..9347b4ab5bc 100644 --- a/infer/src/pulse/PulseAbductiveDomain.ml +++ b/infer/src/pulse/PulseAbductiveDomain.ml @@ -775,13 +775,14 @@ module Internal = struct let is_final = Tenv.lookup tenv typ_name |> Option.value_map ~default:false ~f:(fun {Struct.annots} -> Annot.Item.is_final annots) + || Typ.Name.is_python_final typ_name in if is_final then let phi' = PulseFormula.add_dynamic_type_unsafe (downcast addr) (Typ.mk_struct typ_name) location astate.path_condition in - set_path_condition phi' astate + set_path_condition phi' astate |> SafeAttributes.add_static_type typ_name addr else SafeAttributes.add_static_type typ_name addr astate diff --git a/infer/src/pulse/PulseModelsPython.ml b/infer/src/pulse/PulseModelsPython.ml index b85e9d6f6cd..fa067f156a1 100644 --- a/infer/src/pulse/PulseModelsPython.ml +++ b/infer/src/pulse/PulseModelsPython.ml @@ -22,9 +22,10 @@ let none_tname = TextualSil.python_none_type_name let tuple_tname = TextualSil.python_tuple_type_name -let unresolved_module_tname = TextualSil.python_unresolved_module_type_name +let module_tname module_name = + let str = F.asprintf "%s%s" PythonClassName.globals_prefix module_name in + Typ.PythonClass (PythonClassName.make str) -let resolved_module_tname = TextualSil.python_resolved_module_type_name let sil_fieldname_from_string_value_exn type_name ((address, _) : DSL.aval) : Fieldname.t DSL.model_monad = @@ -100,23 +101,10 @@ module Tuple = struct load_access tuple (FieldAccess field) end -module UnresolvedModule = struct +module PyModule = struct let make name : DSL.aval DSL.model_monad = let open DSL.Syntax in - constructor unresolved_module_tname [("name", name)] - - - let name module_ : string option DSL.model_monad = - let open DSL.Syntax in - let field = Fieldname.make unresolved_module_tname "name" in - let* aval = load_access module_ (FieldAccess field) in - as_constant_string aval -end - -module ResolvedModule = struct - let make : DSL.aval DSL.model_monad = - let open DSL.Syntax in - constructor resolved_module_tname [] + constructor (module_tname name) [] end let build_tuple args : model = @@ -184,47 +172,52 @@ let await_awaitable arg : unit DSL.model_monad = fst arg |> AddressAttributes.await_awaitable |> DSL.Syntax.exec_command +(* Only Python frontend builtins ($builtins.py_) have a C-style syntax, so we + must catch other specific calls here *) +let modelled_python_call module_name fun_name args : DSL.aval option DSL.model_monad = + let open DSL.Syntax in + match (module_name, fun_name, args) with + | "asyncio", Some "run", [arg] -> + let* () = await_awaitable arg in + let* res = fresh () in + ret (Some res) + | "asyncio", Some "sleep", _ -> + let* res = fresh () in + let* () = allocation Attribute.Awaitable res in + ret (Some res) + | _, _, _ -> + ret None + + let call_method globals name obj arg_names args : model = (* TODO: take into account named args *) let open DSL.Syntax in start_model @@ fun () -> + let* opt_dynamic_type_data = get_dynamic_type ~ask_specialization:true obj in let* res = - dynamic_dispatch obj - ~cases: - [ ( unresolved_module_tname - , fun () -> - let* opt_module_name = UnresolvedModule.name obj in - let* opt_method_name = as_constant_string name in - let* special, res = - match (opt_module_name, opt_method_name, args) with - | Some "asyncio", Some "run", [arg] -> - let* () = await_awaitable arg in - let* res = fresh () in - ret (true, res) - | Some "asyncio", Some "sleep", _ -> - let* res = fresh () in - let* () = allocation Attribute.Awaitable res in - ret (true, res) - | _, _, _ -> - let* res = fresh () in - ret (false, res) - in - if special then - L.d_printfln "special method call (%a, %a)" (Pp.option F.pp_print_string) - opt_module_name (Pp.option F.pp_print_string) opt_method_name - else - L.d_printfln "unknown special method call (%a, %a)" (Pp.option F.pp_print_string) - opt_module_name (Pp.option F.pp_print_string) opt_method_name ; - ret res ) - ; ( resolved_module_tname - , fun () -> - let* closure = Dict.get obj name in - call_dsl ~closure ~globals:obj ~arg_names ~args ) ] - ~default:(fun () -> + match opt_dynamic_type_data with + | Some {Formula.typ= {Typ.desc= Tstruct type_name}} when Typ.Name.is_python_module type_name + -> ( + (* since module types are final, static type will save us most of the time *) + let module_name = Typ.Name.get_python_module_name type_name |> Option.value_exn in + let* opt_str_name = as_constant_string name in + let* opt_special_call = modelled_python_call module_name opt_str_name args in + match opt_special_call with + | None -> + L.d_printfln "calling method %a on module object %s" (Pp.option F.pp_print_string) + opt_str_name module_name ; + let* closure = Dict.get obj name in + call_dsl ~closure ~globals:obj ~arg_names ~args + | Some res -> + L.d_printfln "catching special call %a on module object %s" + (Pp.option F.pp_print_string) opt_str_name module_name ; + ret res ) + | _ -> let* closure = Dict.get obj name in (* TODO: for OO method, gives self argument *) - call_dsl ~closure ~globals ~arg_names ~args ) + (* this is incorrect for a module *) + call_dsl ~closure ~globals ~arg_names ~args in assign_ret res @@ -262,13 +255,9 @@ let import_name name _fromlist _level : model = let class_name = PythonClassName.make module_name in let function_name = "__module_body__" in let proc_name = Procname.make_python ~class_name:(Some class_name) ~function_name in - if IRAttributes.load proc_name |> Option.is_none then ( - L.d_printfln "module %s is unresolved" module_name ; - let* module_ = UnresolvedModule.make name in - assign_ret module_ ) + let* module_ = PyModule.make module_name in + if IRAttributes.load proc_name |> Option.is_none then assign_ret module_ else - let* module_ = ResolvedModule.make in - (* TODO: call it only once! *) let* _ = python_call proc_name [("globals", module_)] in assign_ret module_ diff --git a/infer/src/textual/TextualSil.ml b/infer/src/textual/TextualSil.ml index 3e276970c29..8eb146bc118 100644 --- a/infer/src/textual/TextualSil.ml +++ b/infer/src/textual/TextualSil.ml @@ -145,12 +145,6 @@ let python_none_type_name = SilTyp.PythonClass (PythonClassName.make "PyNone") let python_tuple_type_name = SilTyp.PythonClass (PythonClassName.make "PyTuple") -let python_unresolved_module_type_name = - SilTyp.PythonClass (PythonClassName.make "PyUnresolvedModule") - - -let python_resolved_module_type_name = SilTyp.PythonClass (PythonClassName.make "PyResolvedModule") - let mk_python_mixed_type_textual loc = Typ.Struct TypeName.{value= "PyObject"; loc} let default_return_type (lang : Lang.t option) loc = diff --git a/infer/src/textual/TextualSil.mli b/infer/src/textual/TextualSil.mli index 540389a6219..5bd9036ab76 100644 --- a/infer/src/textual/TextualSil.mli +++ b/infer/src/textual/TextualSil.mli @@ -65,10 +65,6 @@ val python_mixed_type_name : Typ.name val python_tuple_type_name : Typ.name -val python_unresolved_module_type_name : Typ.name - -val python_resolved_module_type_name : Typ.name - val wildcard_sil_fieldname : Textual.Lang.t -> string -> Fieldname.t val textual_ext : string diff --git a/infer/tests/codetoanalyze/python/pulse/async_level0.py b/infer/tests/codetoanalyze/python/pulse/async_level0.py index 441bc029a0c..f57723fb592 100644 --- a/infer/tests/codetoanalyze/python/pulse/async_level0.py +++ b/infer/tests/codetoanalyze/python/pulse/async_level0.py @@ -10,7 +10,7 @@ async def sleep(i): await asyncio.sleep(i) -async def FN_sleep_bad(i): +async def sleep_bad(i): asyncio.sleep(i) async def call_sleep_ok(): @@ -49,10 +49,10 @@ async def call_sleep_with_branchs_bad2(b): return await temp -def FP_main_ok(): +def main_ok(): asyncio.run(sleep(10)) -FP_main_ok() +main_ok() # no FP here asyncio.run(sleep(10)) diff --git a/infer/tests/codetoanalyze/python/pulse/issues.exp b/infer/tests/codetoanalyze/python/pulse/issues.exp index 91486c67fd8..77723f23525 100644 --- a/infer/tests/codetoanalyze/python/pulse/issues.exp +++ b/infer/tests/codetoanalyze/python/pulse/issues.exp @@ -1,9 +1,8 @@ +async_level0.py, async_level0.sleep_bad, 1, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,allocated by async call here,awaitable becomes unreachable here] async_level0.py, async_level0.call_sleep_bad, 1, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,allocated by async call here,awaitable becomes unreachable here] async_level0.py, async_level0.call_sleep_with_branchs_bad1, 3, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,allocated by async call here,awaitable becomes unreachable here] async_level0.py, async_level0.call_sleep_with_branchs_bad2, 4, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,allocated by async call here,awaitable becomes unreachable here] -async_level0.py, async_level0.FP_main_ok, 1, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,allocated by async call here,awaitable becomes unreachable here] async_level1.py, async_level1.D::main, 1, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,allocated by async call here,awaitable becomes unreachable here] -taint_import_simple.py, taint_import_simple.basic_flow_bad, 2, TAINT_ERROR, no_bucket, ERROR, [in call to `closure:taint:1.call`,source of the taint here: value returned from `taint.source` with kind `Simple`,return from call to `closure:taint:1.call`,when calling `closure:taint:0.call` here,flows to this sink: value passed as argument `#1` to `taint.sink` with kind `Simple`], source: taint.source, sink: taint.sink, tainted expression: UNKNOWN taint_import_with_package.py, taint_import_with_package.__module_body__, 62, TAINT_ERROR, no_bucket, ERROR, [in call to `dir1::testmod.__module_body__`,in call to `closure:taint:1.call`,source of the taint here: value returned from `taint.source` with kind `Simple`,return from call to `closure:taint:1.call`,return from call to `dir1::testmod.__module_body__`,when calling `closure:taint_import_with_package:0.call` here,when calling `taint_import_with_package.use1_bad` here,when calling `closure:taint:0.call` here,flows to this sink: value passed as argument `#1` to `taint.sink` with kind `Simple`], source: taint.source, sink: taint.sink, tainted expression: globals->tainted_global1 taint_import_with_package.py, taint_import_with_package.__module_body__, 68, TAINT_ERROR, no_bucket, ERROR, [in call to `dir2::testmod.__module_body__`,in call to `closure:taint:1.call`,source of the taint here: value returned from `taint.source` with kind `Simple`,return from call to `closure:taint:1.call`,return from call to `dir2::testmod.__module_body__`,when calling `closure:taint_import_with_package:2.call` here,when calling `taint_import_with_package.use2_bad` here,when calling `closure:taint:0.call` here,flows to this sink: value passed as argument `#1` to `taint.sink` with kind `Simple`], source: taint.source, sink: taint.sink, tainted expression: globals->tainted_global2 taint_import_with_package.py, taint_import_with_package.__module_body__, 74, TAINT_ERROR, no_bucket, ERROR, [in call to `dir1::dir3::testmod.__module_body__`,in call to `closure:taint:1.call`,source of the taint here: value returned from `taint.source` with kind `Simple`,return from call to `closure:taint:1.call`,return from call to `dir1::dir3::testmod.__module_body__`,when calling `closure:taint_import_with_package:4.call` here,when calling `taint_import_with_package.use3_bad` here,when calling `closure:taint:0.call` here,flows to this sink: value passed as argument `#1` to `taint.sink` with kind `Simple`], source: taint.source, sink: taint.sink, tainted expression: globals->tainted_global3 diff --git a/infer/tests/codetoanalyze/python/pulse/taint_import_simple.py b/infer/tests/codetoanalyze/python/pulse/taint_import_simple.py index 7e9f5f6a1b5..80a78065616 100644 --- a/infer/tests/codetoanalyze/python/pulse/taint_import_simple.py +++ b/infer/tests/codetoanalyze/python/pulse/taint_import_simple.py @@ -6,12 +6,12 @@ import taint from taint import sink -def basic_flow_bad() -> None: +def FN_basic_flow_bad() -> None: tainted = taint.source() sink(tainted) def basic_flow_ok(untainted: int) -> None: sink(untainted) -basic_flow_bad() +FN_basic_flow_bad() basic_flow_ok(0)