diff --git a/infer/src/python/PyEnv.ml b/infer/src/python/PyEnv.ml index 7c0216a6939..fc917b13f28 100644 --- a/infer/src/python/PyEnv.ml +++ b/infer/src/python/PyEnv.ml @@ -224,6 +224,8 @@ end after [register_class] *) type class_info = {parents: Ident.t list} +type method_info = {signature: Signature.t; default_arguments: T.Exp.t list} + type label_info = {label_name: string; ssa_parameters: T.Typ.t list; prelude: prelude option; processed: bool} @@ -242,8 +244,9 @@ and shared = (** All the builtins that have been called, so we only export them in textual to avoid too much noise *) ; imported_values: Ident.Set.t - ; signatures: Signature.t SMap.t Ident.Map.t - (** Map from module names to the signature of all of their functions/methods *) + ; methods: method_info SMap.t Ident.Map.t + (** Map from module names to information about their methods (signatures, default arguments, + ... *) ; fields: PyCommon.signature T.TypeName.Map.t (** Map from fully qualified class name to the list of known fields and their types *) ; module_name: Ident.t @@ -370,7 +373,7 @@ let empty module_name = ; classes= SMap.empty ; builtins= Builtin.Set.empty ; imported_values= Ident.Set.empty - ; signatures= Ident.Map.empty + ; methods= Ident.Map.empty ; fields= T.TypeName.Map.empty ; module_name ; params= [] @@ -566,16 +569,18 @@ let register_call env fid = env -let register_method ({shared} as env) ~enclosing_class ~method_name annotations = +let register_method ({shared} as env) ~enclosing_class ~method_name signature default_arguments = PyDebug.p "[register_method] %a.%s\n" Ident.pp enclosing_class method_name ; - PyDebug.p " %a\n" Signature.pp annotations ; - let {signatures} = shared in - let class_info = - Ident.Map.find_opt enclosing_class signatures |> Option.value ~default:SMap.empty + PyDebug.p " %a\n" Signature.pp signature ; + PyDebug.p " %a\n" (Pp.seq ~sep:", " T.Exp.pp) default_arguments ; + let {methods} = shared in + let method_info = + Ident.Map.find_opt enclosing_class methods |> Option.value ~default:SMap.empty in - let class_info = SMap.add method_name annotations class_info in - let signatures = Ident.Map.add enclosing_class class_info signatures in - let shared = {shared with signatures} in + let info = {signature; default_arguments} in + let method_info = SMap.add method_name info method_info in + let methods = Ident.Map.add enclosing_class method_info methods in + let shared = {shared with methods} in {env with shared} @@ -601,20 +606,22 @@ let register_fields ({shared} as env) class_name class_fields = {env with shared} -let register_function ({shared} as env) fname loc annotations = +let register_function ({shared} as env) fname loc annotations default_arguments = PyDebug.p "[register_function] %s\n" fname ; let {module_name} = shared in let info = {Signature.is_static= false; is_abstract= false; annotations} in - let env = register_method env ~enclosing_class:module_name ~method_name:fname info in + let env = + register_method env ~enclosing_class:module_name ~method_name:fname info default_arguments + in let key = Ident.mk ~loc fname in let id = Ident.extend ~prefix:module_name fname in let symbol_info = {Symbol.kind= Code; id; loc} in register_symbol env (Symbol.Global key) symbol_info -let lookup_method {shared= {signatures}} ~enclosing_class name = +let lookup_method {shared= {methods}} ~enclosing_class name = let open Option.Let_syntax in - Ident.Map.find_opt enclosing_class signatures >>= SMap.find_opt name + Ident.Map.find_opt enclosing_class methods >>= SMap.find_opt name let lookup_fields {shared= {fields}} class_name = T.TypeName.Map.find_opt class_name fields diff --git a/infer/src/python/PyEnv.mli b/infer/src/python/PyEnv.mli index 4481ba167f4..265659467b7 100644 --- a/infer/src/python/PyEnv.mli +++ b/infer/src/python/PyEnv.mli @@ -128,11 +128,14 @@ module Label : sig (** Process a label [info] and turn it into Textual information *) end -(** Class Level info. For now, only the parent info (if present) is tracked, supporting multiple +(** Class level info. For now, only the parent info (if present) is tracked, supporting multiple inheritance. We may track more information in the future, like being an abstract class, a dataclass, ... *) type class_info = {parents: Ident.t list} +(** Method level info. We store a method/function signature, and its default arugments *) +type method_info = {signature: Signature.t; default_arguments: T.Exp.t list} + val empty : Ident.t -> t val loc : t -> T.Location.t @@ -221,11 +224,12 @@ val mk_builtin_call : t -> PyBuiltin.textual -> T.Exp.t list -> t * T.Ident.t * (** Wrapper to compute the Textual version of a call to a "textual" builtin * function (a builtin we introduced for modeling purpose) *) -val register_function : t -> string -> T.Location.t -> PyCommon.signature -> t +val register_function : t -> string -> T.Location.t -> PyCommon.signature -> T.Exp.t list -> t (** Register a function declaration. We keep track of them since they might shadow Python builtins or previous definitions *) -val register_method : t -> enclosing_class:Ident.t -> method_name:string -> Signature.t -> t +val register_method : + t -> enclosing_class:Ident.t -> method_name:string -> Signature.t -> T.Exp.t list -> t (** Register a method declaration. We mostly keep track of their signatures *) val register_fields : t -> T.TypeName.t -> PyCommon.signature -> t @@ -233,7 +237,7 @@ val register_fields : t -> T.TypeName.t -> PyCommon.signature -> t calls to this function with the same class name in a best effort attempt: Python is dynamic, and any [self.foo] access could give rise to such a registration *) -val lookup_method : t -> enclosing_class:Ident.t -> string -> Signature.t option +val lookup_method : t -> enclosing_class:Ident.t -> string -> method_info option (** Lookup the information stored for a function/method in the relevant [enclosing_class] *) val lookup_fields : t -> T.TypeName.t -> PyCommon.signature option diff --git a/infer/src/python/PyTrans.ml b/infer/src/python/PyTrans.ml index 9d0737818ce..3afa5c85b59 100644 --- a/infer/src/python/PyTrans.ml +++ b/infer/src/python/PyTrans.ml @@ -125,6 +125,7 @@ module Error = struct | CallKeywordBuildClass | RaiseExceptionInvalid of int | RaiseExceptionUnknown of DataStack.cell + | DefaultArgSpecialization of T.qualified_procname * int * int type t = L.error * kind @@ -223,6 +224,9 @@ module Error = struct F.fprintf fmt "RAISE_VARARGS invalid mode %d" n | RaiseExceptionUnknown cell -> F.fprintf fmt "RAISE_VARARGS unknown construct %a" DataStack.pp_cell cell + | DefaultArgSpecialization (name, param_size, default_size) -> + F.fprintf fmt "%a has more default arguments (%d) then actual arguments (%d)" + T.pp_qualified_procname name default_size param_size let class_decl (err, kind) = (err, ClassDecl kind) @@ -885,6 +889,10 @@ module FUNCTION = struct annotations + (* TODO: we don't support correctly keeping track of non constant default arugments. + For example if the value is a global constant, we'll store + it's loaded reference `n` which won't make sense in the specialized + function. *) let unpack_defaults env code defaults = let open IResult.Let_syntax in match (defaults : DataStack.cell) with @@ -937,14 +945,12 @@ module FUNCTION = struct let* annotations = Option.value_map ~default:(Ok []) ~f:(unpack_annotations env code) annotations in - let* env = - if MakeFunctionFlags.mem flags DefaultValues then ( + let* env, default_arguments = + if MakeFunctionFlags.mem flags DefaultValues then let* env, cell = pop_datastack opname env in - let* env, _defaults = unpack_defaults env code cell in - L.debug Capture Quiet - "[MAKE_FUNCTION] TODO generate overriding functions with default args inlined@\n" ; - Ok env ) - else Ok env + let* env, defaults = unpack_defaults env code cell in + Ok (env, defaults) + else Ok (env, []) in let* code = match DataStack.as_code code body with @@ -982,7 +988,7 @@ module FUNCTION = struct if FFI.Code.is_closure code then L.user_warning "%s: support for closures is incomplete (%s)@\n" opname code_name ; let loc = Env.loc env in - let env = Env.register_function env code_name loc annotations in + let env = Env.register_function env code_name loc annotations default_arguments in let env = Env.push env (DataStack.Code {fun_or_class= true; code_name; code}) in Ok (env, None) end @@ -2731,24 +2737,99 @@ let rec nodes env label_info code instructions = Ok (env, textual_node :: more_textual_nodes) +(** Given a function signature and some default arguments, this function will generate a specialized + version with some formal arguments removed, replaced by their default value. *) +let specialize_proc_decl qualified_name formals_types result_type attributes params + default_arguments = + let open IResult.Let_syntax in + let param_size = List.length params in + let default_size = List.length default_arguments in + let* () = + if param_size < default_size then + Error + (L.ExternalError, Error.DefaultArgSpecialization (qualified_name, default_size, param_size)) + else Ok () + in + let n = param_size - default_size in + let params, locals = List.split_n params n in + let formals_types, default_types = List.split_n formals_types n in + let typed_params = List.zip_exn params formals_types in + let locals = List.zip_exn locals default_types in + let defaults = List.zip_exn locals default_arguments in + let procdecl = + {T.ProcDecl.qualified_name; formals_types= Some formals_types; result_type; attributes} + in + let stores = + List.map defaults ~f:(fun ((var, {T.Typ.typ}), exp) -> + T.Instr.Store {exp1= T.Exp.Lvar var; typ= Some typ; exp2= exp; loc= Unknown} ) + in + let id = T.Ident.of_int 0 in + let args = + List.map + ~f:(fun (var, {T.Typ.typ}) -> T.Exp.Load {exp= T.Exp.Lvar var; typ= Some typ}) + typed_params + in + let local_args = + List.map ~f:(fun (var, {T.Typ.typ}) -> T.Exp.Load {exp= T.Exp.Lvar var; typ= Some typ}) locals + in + let exp = T.Exp.call_non_virtual qualified_name (args @ local_args) in + let instrs = stores @ [T.Instr.Let {id; exp; loc= Unknown}] in + let label = {T.NodeName.value= "b0"; loc= Unknown} in + let node = + { T.Node.label + ; ssa_parameters= [] + ; exn_succs= [] + ; last= T.Terminator.Ret (T.Exp.Var id) + ; instrs + ; last_loc= Unknown + ; label_loc= Unknown } + in + Ok {T.ProcDesc.procdecl; nodes= [node]; start= label; params; locals; exit_loc= Unknown} + + +let generate_specialized_proc_decl qualified_name formals_types result_type attributes params + default_arguments = + let open IResult.Let_syntax in + if List.is_empty default_arguments then Ok [] + else + let f = specialize_proc_decl qualified_name formals_types result_type attributes params in + let rec fold default_arguments = + match default_arguments with + | [] -> + Ok [] + | _ :: tl -> + let* decl = f default_arguments in + let* decls = fold tl in + Ok (T.Module.Proc decl :: decls) + in + fold default_arguments + + (** Process a single code unit (toplevel code, function body, ...) *) let to_proc_desc env loc enclosing_class_name opt_name ({FFI.Code.instructions} as code) = let open IResult.Let_syntax in Debug.p "[to_proc_desc] %a %a\n" Ident.pp enclosing_class_name (Pp.option F.pp_print_string) opt_name ; - let is_toplevel, is_static, is_abstract, name, annotations = + let default annotations = + let signature = {Env.Signature.is_static= false; is_abstract= false; annotations} in + {Env.signature; default_arguments= []} + in + let is_toplevel, name, method_info = match opt_name with | None -> let return_typ = {PyCommon.name= PyCommon.return; annotation= Ident.mk "None"} in - (true, false, false, PyCommon.toplevel_function, [return_typ]) + let method_info = default [return_typ] in + (true, PyCommon.toplevel_function, method_info) | Some name -> ( - let signature = Env.lookup_method env ~enclosing_class:enclosing_class_name name in - match signature with + let method_info = Env.lookup_method env ~enclosing_class:enclosing_class_name name in + match method_info with | None -> - (false, false, false, name, []) - | Some {Env.Signature.is_static; is_abstract; annotations} -> - (false, is_static, is_abstract, name, annotations) ) + (false, name, default []) + | Some method_info -> + (false, name, method_info) ) in + let {Env.signature; default_arguments} = method_info in + let {Env.Signature.is_static; is_abstract; annotations} = signature in let proc_name = proc_name ~loc name in let enclosing_class = Ident.to_type_name ~static:is_static enclosing_class_name in let qualified_name = qualified_procname ~enclosing_class proc_name in @@ -2795,8 +2876,12 @@ let to_proc_desc env loc enclosing_class_name opt_name ({FFI.Code.instructions} let procdecl = {T.ProcDecl.qualified_name; formals_types= Some formals_types; result_type; attributes= []} in - if is_abstract then Ok (env, T.Module.Procdecl procdecl) + if is_abstract then Ok (env, [T.Module.Procdecl procdecl]) else + let* specialized_decls = + generate_specialized_proc_decl qualified_name formals_types result_type [] params + default_arguments + in let env, entry_label = Env.mk_fresh_label env in let label = node_name ~loc entry_label in let label_info = Env.Label.mk entry_label in @@ -2804,7 +2889,7 @@ let to_proc_desc env loc enclosing_class_name opt_name ({FFI.Code.instructions} Ok ( env , T.Module.Proc {T.ProcDesc.procdecl; nodes; start= label; params; locals; exit_loc= Unknown} - ) + :: specialized_decls ) (* For each class declaration, we generate an explicit constructor if there @@ -2939,6 +3024,7 @@ let rec class_declaration env module_name ({FFI.Code.instructions; co_name} as c (* TODO: Fix class method parsing to deal with default parameters. It will require a big rewrite of the whole thing, so postponing to a later diff *) + let default_arguments = [] in let* () = check_flags opname flags in let env = match FFI.Constant.as_code code with @@ -2946,7 +3032,7 @@ let rec class_declaration env module_name ({FFI.Code.instructions; co_name} as c let annotations = List.filter_map ~f:(lift_annotation env) signature in let info = {PyEnv.Signature.is_static; is_abstract; annotations} in Env.register_method env ~enclosing_class:class_name ~method_name:code.FFI.Code.co_name - info + info default_arguments | None -> env in @@ -3040,8 +3126,10 @@ and to_proc_descs env enclosing_class_id codes = let* env, new_decls = class_declaration env enclosing_class_id code loc parents in Ok (env, new_decls @ decls) | None -> - let* env, decl = to_proc_desc env loc enclosing_class_id (Some co_name) code in - Ok (env, decl :: decls) ) ) + let* env, specialized_decls = + to_proc_desc env loc enclosing_class_id (Some co_name) code + in + Ok (env, specialized_decls @ decls) ) ) let python_attribute = T.Attr.mk_source_language T.Lang.Python @@ -3095,7 +3183,7 @@ let to_module ~sourcefile ({FFI.Code.co_consts; co_name; co_filename; instructio this would print `10` and then `"cat"`. We should investigate if suche code exists, and in which quantity, to see if it is worth finding a solution for it. *) - let* env, decl = to_proc_desc env loc module_name None code in + let* env, specialized_decls = to_proc_desc env loc module_name None code in (* Translate globals to Textual *) let globals = Ident.Map.fold @@ -3128,7 +3216,7 @@ let to_module ~sourcefile ({FFI.Code.co_consts; co_name; co_filename; instructio in (* Gather everything into a Textual module *) let decls = - ((decl :: decls) @ globals @ imports @ python_implicit_names) + ((specialized_decls @ decls) @ globals @ imports @ python_implicit_names) @ Builtin.Set.to_textual (Env.get_used_builtins env) in Ok {T.Module.attrs= [python_attribute]; decls; sourcefile} diff --git a/infer/src/python/unit/PyTransTest.ml b/infer/src/python/unit/PyTransTest.ml index 82c7cc5a5fe..3c22427a554 100644 --- a/infer/src/python/unit/PyTransTest.ml +++ b/infer/src/python/unit/PyTransTest.ml @@ -3385,13 +3385,15 @@ let%test_module "default_arguments" = class C: pass -c = C() - -def f(x: int, y=c, z=C()): +# TODO: we only support simple types as default arguments. +# We might add support for objects/instances if need be, in the future +def f(x, y=1, z=2, s="zuck"): pass -def g(x, y=1, z=2): - pass +f(0) +f(10, 100) +f(100, 1000, 0) +f(0, 0, 0, "toto") |} in test source ; @@ -3402,12 +3404,11 @@ def g(x, y=1, z=2): define dummy.$toplevel() : *PyNone { #b0: n0 = $builtins.python_class("dummy::C") - n1 = dummy::C() - store &dummy::c <- n1:*dummy::C - n2 = dummy::C() - n3:*dummy::C = load &dummy::c - n4 = $builtins.python_code("dummy.f") - n5 = $builtins.python_code("dummy.g") + n1 = $builtins.python_code("dummy.f") + n2 = dummy.f($builtins.python_int(0)) + n3 = dummy.f($builtins.python_int(10), $builtins.python_int(100)) + n4 = dummy.f($builtins.python_int(100), $builtins.python_int(1000), $builtins.python_int(0)) + n5 = dummy.f($builtins.python_int(0), $builtins.python_int(0), $builtins.python_int(0), $builtins.python_string("toto")) ret null } @@ -3422,19 +3423,41 @@ def g(x, y=1, z=2): type dummy::C = {} - define dummy.f(x: *PyInt, y: *PyObject, z: *PyObject) : *PyObject { + define dummy.f(x: *PyObject, y: *PyObject, z: *PyObject) : *PyObject { + local s: *PyObject #b0: - ret null + store &s <- $builtins.python_string("zuck"):*PyObject + n0 = dummy.f([&x:*PyObject], [&y:*PyObject], [&z:*PyObject], [&s:*PyObject]) + ret n0 } - define dummy.g(x: *PyObject, y: *PyObject, z: *PyObject) : *PyObject { + define dummy.f(x: *PyObject, y: *PyObject) : *PyObject { + local z: *PyObject, s: *PyObject #b0: - ret null + store &z <- $builtins.python_int(2):*PyObject + store &s <- $builtins.python_string("zuck"):*PyObject + n0 = dummy.f([&x:*PyObject], [&y:*PyObject], [&z:*PyObject], [&s:*PyObject]) + ret n0 } - global dummy::c: *PyObject + define dummy.f(x: *PyObject) : *PyObject { + local y: *PyObject, z: *PyObject, s: *PyObject + #b0: + store &y <- $builtins.python_int(1):*PyObject + store &z <- $builtins.python_int(2):*PyObject + store &s <- $builtins.python_string("zuck"):*PyObject + n0 = dummy.f([&x:*PyObject], [&y:*PyObject], [&z:*PyObject], [&s:*PyObject]) + ret n0 + + } + + define dummy.f(x: *PyObject, y: *PyObject, z: *PyObject, s: *PyObject) : *PyObject { + #b0: + ret null + + } global $python_implicit_names::__name__: *PyString @@ -3454,8 +3477,5 @@ def g(x, y=1, z=2): declare $builtins.python_float(float) : *PyFloat - declare $builtins.python_int(int) : *PyInt - - [MAKE_FUNCTION] TODO generate overriding functions with default args inlined - [MAKE_FUNCTION] TODO generate overriding functions with default args inlined |}] + declare $builtins.python_int(int) : *PyInt |}] end )