From 476dd6a62fc8bc9566b1b9b988143fbac047e4b0 Mon Sep 17 00:00:00 2001 From: Vincent Siles Date: Tue, 26 Sep 2023 05:10:51 -0700 Subject: [PATCH] [infer/py] Improve BUILD_CONST_KEY_MAP support and add BUILD_MAP Summary: BUILD_CONST_KEY_MAP is a specialized version of BUILD_MAP. This diffs adds supports for the latter, refactoring the former to share some structure. It also get rid of a TODO : we can now load maps ! Reviewed By: ngorogiannis Differential Revision: D49457576 fbshipit-source-id: d56020120ee592b81d510fa962821ba7c738a5f7 --- infer/src/python/PyBuiltin.ml | 44 ++++---- infer/src/python/PyBuiltin.mli | 6 +- infer/src/python/PyCommon.ml | 10 ++ infer/src/python/PyCommon.mli | 6 ++ infer/src/python/PyEnv.ml | 2 +- infer/src/python/PyEnv.mli | 2 +- infer/src/python/PyTrans.ml | 155 +++++++++++++++------------ infer/src/python/unit/PyTransTest.ml | 59 ++++++++++ 8 files changed, 193 insertions(+), 91 deletions(-) diff --git a/infer/src/python/PyBuiltin.ml b/infer/src/python/PyBuiltin.ml index d946713dae8..d626c6c9f07 100644 --- a/infer/src/python/PyBuiltin.ml +++ b/infer/src/python/PyBuiltin.ml @@ -92,6 +92,19 @@ module Builtin = struct "xor" + type collection = List | Set | Tuple | Map [@@deriving compare] + + let collection_to_string = function + | List -> + "list" + | Set -> + "set" + | Tuple -> + "tuple" + | Map -> + "map" + + type textual = | IsTrue | Binary of binary_op @@ -104,9 +117,7 @@ module Builtin = struct | PythonCode | PythonIter | PythonIterNext - | PythonBuildList - | PythonBuildSet - | PythonBuildTuple + | PythonBuild of collection | PythonIndex | PythonSubscriptGet | PythonSubscriptSet @@ -165,12 +176,8 @@ let to_proc_name = function "python_iter" | PythonIterNext -> "python_iter_next" - | PythonBuildList -> - "python_build_list" - | PythonBuildSet -> - "python_build_set" - | PythonBuildTuple -> - "python_build_tuple" + | PythonBuild builder -> + sprintf "python_build_%s" (collection_to_string builder) | PythonIndex -> "python_index" | PythonSubscriptGet -> @@ -269,6 +276,9 @@ module Set = struct ; result_type= annotatedObject ; used_struct_types= [] } ) in + let no_formal ?(result_type = annotatedObject) op = + (op, {formals_types= None; result_type; used_struct_types= []}) + in let builtins = [ ( Builtin.IsTrue , { formals_types= Some [annotatedObject] @@ -300,10 +310,8 @@ module Set = struct ; binary_op (Builtin.Inplace Subtract) ; binary_op (Builtin.Inplace TrueDivide) ; binary_op (Builtin.Inplace Xor) - ; ( Builtin.PythonCall - , {formals_types= None; result_type= annotatedObject; used_struct_types= []} ) - ; ( Builtin.PythonCallKW - , {formals_types= None; result_type= annotatedObject; used_struct_types= []} ) + ; no_formal Builtin.PythonCall + ; no_formal Builtin.PythonCallKW ; ( Builtin.PythonKWArg , { formals_types= Some [annot string_; annotatedObject] ; result_type= annotatedObject @@ -325,12 +333,10 @@ module Set = struct , { formals_types= Some [annotatedObject] ; result_type= annot PyCommon.pyIterItem ; used_struct_types= [PyCommon.pyIterItemStruct] } ) - ; ( Builtin.PythonBuildList - , {formals_types= None; result_type= annot PyCommon.pyList; used_struct_types= []} ) - ; ( Builtin.PythonBuildSet - , {formals_types= None; result_type= annot PyCommon.pySet; used_struct_types= []} ) - ; ( Builtin.PythonBuildTuple - , {formals_types= None; result_type= annot PyCommon.pyTuple; used_struct_types= []} ) + ; no_formal (Builtin.PythonBuild List) ~result_type:(annot PyCommon.pyList) + ; no_formal (Builtin.PythonBuild Set) ~result_type:(annot PyCommon.pySet) + ; no_formal (Builtin.PythonBuild Tuple) ~result_type:(annot PyCommon.pyTuple) + ; no_formal (Builtin.PythonBuild Map) ~result_type:(annot PyCommon.pyMap) ; ( Builtin.PythonIndex , { formals_types= Some [annot PyCommon.pyObject; annot T.Typ.Int] ; result_type= annot PyCommon.pyObject diff --git a/infer/src/python/PyBuiltin.mli b/infer/src/python/PyBuiltin.mli index 2f4252e6a74..96db3a41af0 100644 --- a/infer/src/python/PyBuiltin.mli +++ b/infer/src/python/PyBuiltin.mli @@ -30,6 +30,8 @@ type binary_op = | Xor [@@deriving compare] +type collection = List | Set | Tuple | Map [@@deriving compare] + type textual = | IsTrue | Binary of binary_op @@ -41,9 +43,7 @@ type textual = | PythonCode | PythonIter | PythonIterNext - | PythonBuildList - | PythonBuildSet - | PythonBuildTuple + | PythonBuild of collection | PythonIndex | PythonSubscriptGet | PythonSubscriptSet diff --git a/infer/src/python/PyCommon.ml b/infer/src/python/PyCommon.ml index bda4826bf65..5c7c7a7a4ea 100644 --- a/infer/src/python/PyCommon.ml +++ b/infer/src/python/PyCommon.ml @@ -46,6 +46,8 @@ let pyClass = mk_type "PyClass" let pyList = mk_type "PyList" +let pyMap = mk_type "PyMap" + let pySet = mk_type "PySet" let pyTuple = mk_type "PyTuple" @@ -114,6 +116,14 @@ let mk_string (s : string) = T.Exp.Call {proc; args; kind= NonVirtual} +let get_string = function + | T.Exp.Call {proc; args= [arg]; kind= NonVirtual} + when T.equal_qualified_procname proc python_string -> ( + match arg with Const (Str s) -> Some s | _ -> None ) + | _ -> + None + + let mk_bytes (s : bytes) = let proc = python_bytes in let s = Bytes.to_string s in diff --git a/infer/src/python/PyCommon.mli b/infer/src/python/PyCommon.mli index 672f8dab50b..d49b68ab448 100644 --- a/infer/src/python/PyCommon.mli +++ b/infer/src/python/PyCommon.mli @@ -86,6 +86,9 @@ val pyNone : Textual.Typ.t val pyList : Textual.Typ.t (** Python's builtin [list] type *) +val pyMap : Textual.Typ.t +(** Python's builtin [map] type *) + val pySet : Textual.Typ.t (** Python's builtin [set] type *) @@ -101,6 +104,9 @@ val mk_float : float -> Textual.Exp.t val mk_string : string -> Textual.Exp.t (** Helper function to define typed Textual expression for literal strings. *) +val get_string : Textual.Exp.t -> string option +(** Helper to get back a string built with [mk_string] *) + val mk_bytes : bytes -> Textual.Exp.t (** Helper function to define typed Textual expression for literal bytes. *) diff --git a/infer/src/python/PyEnv.ml b/infer/src/python/PyEnv.ml index 4759ec1a862..a7f25452364 100644 --- a/infer/src/python/PyEnv.ml +++ b/infer/src/python/PyEnv.ml @@ -81,7 +81,7 @@ module DataStack = struct | VarName of int | Temp of T.Ident.t | Code of {fun_or_class: bool; code_name: string; code: FFI.Code.t} - | Map of (string * cell) list + | Map of (T.Exp.t * cell) list | BuiltinBuildClass | Import of {import_path: Ident.t; symbols: string list} | ImportCall of {id: Ident.t; loc: T.Location.t} diff --git a/infer/src/python/PyEnv.mli b/infer/src/python/PyEnv.mli index 83cf2619b6f..f1c37b34021 100644 --- a/infer/src/python/PyEnv.mli +++ b/infer/src/python/PyEnv.mli @@ -61,7 +61,7 @@ module DataStack : sig | Temp of T.Ident.t (** SSA variable *) | Code of {fun_or_class: bool; code_name: string; code: FFI.Code.t} (** [code] Python object with its name. It can be a function, class, closure, ... *) - | Map of (string * cell) list + | Map of (T.Exp.t * cell) list (** Light encoding of raw Python tuples/dicts. Only used for type annotations at the moment. *) | BuiltinBuildClass (** see Python's [LOAD_BUILD_CLASS] *) | Import of {import_path: Ident.t; symbols: string list} diff --git a/infer/src/python/PyTrans.ml b/infer/src/python/PyTrans.ml index a428f57f0f5..8339babe048 100644 --- a/infer/src/python/PyTrans.ml +++ b/infer/src/python/PyTrans.ml @@ -21,7 +21,6 @@ module SMap = PyCommon.SMap module Error = struct type todo = | UnsupportedOpcode of string - | LoadMapCells | FunctionFlags of string * MakeFunctionFlags.t | StaticCallImport of Ident.t | SuperNotInFile of Ident.t * Ident.t @@ -36,12 +35,11 @@ module Error = struct | CallKwInvalidFunction of DataStack.cell | RaiseException of int | RaiseExceptionSource of DataStack.cell + | MakeFunctionSignature of DataStack.cell let pp_todo fmt = function | UnsupportedOpcode opname -> F.fprintf fmt "Unsupported opcode: %s" opname - | LoadMapCells -> - F.pp_print_string fmt "load DataStack.Map" | FunctionFlags (opname, flags) -> F.fprintf fmt "[%s] support for flags %a is not implemented" opname MakeFunctionFlags.pp flags @@ -75,6 +73,8 @@ module Error = struct F.fprintf fmt "Unsupported RAISE_VARARGS mode %d" n | RaiseExceptionSource cell -> F.fprintf fmt "Unsupported source of exception: %a" DataStack.pp_cell cell + | MakeFunctionSignature cell -> + F.fprintf fmt "Unsupported type annotation: %a" DataStack.pp_cell cell type kind = @@ -98,6 +98,7 @@ module Error = struct | MakeFunctionInvalidCode of DataStack.cell | MakeFunctionInvalidName of DataStack.cell | MakeFunctionInvalidQname of FFI.Constant.t + | MakeFunctionInvalidAnnotations of T.Exp.t | SuperNotInClass of Ident.t | SuperNoParent of Ident.t | SuperInStatic of Ident.t @@ -167,6 +168,8 @@ module Error = struct F.fprintf fmt "invalid function name: %a" DataStack.pp_cell cell | MakeFunctionInvalidQname const -> F.fprintf fmt "invalid function qualified name: %a" FFI.Constant.pp const + | MakeFunctionInvalidAnnotations exp -> + F.fprintf fmt "invalid annotation. Expecting literal string but got %a" T.Exp.pp exp | SuperNotInClass id -> F.fprintf fmt "Call to super() in %a which is not a class" Ident.pp id | SuperNoParent id -> @@ -417,7 +420,7 @@ let is_imported env path = (** Try to load the data referenced by a [DataStack.cell], into a [Textual.Exp.t] *) -let load_cell env {FFI.Code.co_consts; co_names; co_varnames} cell = +let rec load_cell env ({FFI.Code.co_consts; co_names; co_varnames} as code) cell = let open IResult.Let_syntax in let default_info typ = Env.Info.default typ in let default = default_info PyCommon.pyObject in @@ -448,8 +451,15 @@ let load_cell env {FFI.Code.co_consts; co_names; co_varnames} cell = let kind = if fun_or_class then Env.Info.Code else Env.Info.Class in let info = {Env.Info.typ; kind} in Ok (env, exp, info) - | Map _map -> - Error (L.InternalError, Error.TODO LoadMapCells) + | Map map -> + let* env, items = + List.fold_result ~init:(env, []) (List.rev map) ~f:(fun (env, acc) (key, value) -> + let* env, value, _typ = load_cell env code value in + Ok (env, key :: value :: acc) ) + in + let env, id, typ = Env.mk_builtin_call env (Builtin.PythonBuild Map) items in + let env = Env.push env (DataStack.Temp id) in + Ok (env, T.Exp.Var id, default_info typ) | Path id -> if (* TODO: this is incomplete. If something is imported, we can' really know if it is a @@ -840,32 +850,32 @@ module FUNCTION = struct module MAKE = struct let unpack_annotations env {FFI.Code.co_names; co_consts} annotations = - let unpack = - let loc = Env.loc env in - List.fold_left ~init:(Some []) - ~f:(fun acc (name, c) -> - match (acc, c) with - | None, _ -> - None - | Some acc, DataStack.Name {ndx} -> - let annotation = Ident.mk ~loc @@ co_names.(ndx) in - let typed_name = {PyCommon.name; annotation} in - let typed_name = - Option.value ~default:typed_name (lift_annotation env typed_name) - in - Some (typed_name :: acc) - | Some acc, DataStack.Const ndx -> ( - match co_consts.(ndx) with - | FFI.Constant.PYCNone -> - Some ({PyCommon.name; annotation= Ident.mk "None"} :: acc) - | _ -> - Some acc ) - | Some acc, c -> - Debug.p "[unpack_annotations] unsupported cell %a\n" DataStack.pp_cell c ; - Some acc ) - annotations - in - Option.value unpack ~default:[] + let open IResult.Let_syntax in + let loc = Env.loc env in + List.fold_result ~init:[] + ~f:(fun acc (name, c) -> + let* name = + match PyCommon.get_string name with + | Some name -> + Ok name + | _ -> + Error (L.ExternalError, Error.MakeFunctionInvalidAnnotations name) + in + match c with + | DataStack.Name {ndx} -> + let annotation = Ident.mk ~loc @@ co_names.(ndx) in + let typed_name = {PyCommon.name; annotation} in + let typed_name = Option.value ~default:typed_name (lift_annotation env typed_name) in + Ok (typed_name :: acc) + | DataStack.Const ndx -> ( + match co_consts.(ndx) with + | FFI.Constant.PYCNone -> + Ok ({PyCommon.name; annotation= Ident.mk "None"} :: acc) + | _ -> + Ok acc ) + | _ -> + Error (L.InternalError, Error.TODO (MakeFunctionSignature c)) ) + annotations (** {v MAKE_FUNCTION(flags) v} @@ -899,7 +909,9 @@ module FUNCTION = struct Ok (env, annotations) ) else Ok (env, None) in - let annotations = Option.value_map ~default:[] ~f:(unpack_annotations env code) annotations in + let* annotations = + Option.value_map ~default:(Ok []) ~f:(unpack_annotations env code) annotations + in let* code = match DataStack.as_code code body with | None -> @@ -1470,7 +1482,12 @@ module BUILD = struct match c with | FFI.Constant.PYCTuple keys -> Array.fold_result keys ~init:[] ~f:(fun keys c -> - match as_key c with Some key -> Ok (key :: keys) | None -> Error () ) + match as_key c with + | Some key -> + let key = PyCommon.mk_string key in + Ok (key :: keys) + | None -> + Error () ) | _ -> Error () ) | _ -> @@ -1482,9 +1499,9 @@ module BUILD = struct The version of [BUILD_MAP] specialized for constant keys. Pops the top element on the stack which contains a tuple of keys, then starting from [top-of-stack+1], pops [count] values to form values in the built dictionary, which is pushed back on the stack. *) - let run env code {FFI.Instruction.opname; arg} = + let run env code {FFI.Instruction.opname; arg= count} = let open IResult.Let_syntax in - Debug.p "[%s] count = %d\n" opname arg ; + Debug.p "[%s] count = %d\n" opname count ; let* env, cell = pop_datastack opname env in let* keys = match is_tuple_ids code cell with @@ -1494,7 +1511,7 @@ module BUILD = struct Error (L.UserError, Error.LiteralTuple opname) in (* TODO check cells is a tuple of literal strings *) - let* env, values = pop_n_datastack opname env arg in + let* env, values = pop_n_datastack opname env count in Debug.p " #values = %d\n" (List.length values) ; let* map = match List.zip keys values with @@ -1507,47 +1524,49 @@ module BUILD = struct Ok (env, None) end - module LIST = struct - (** {v BUILD_LIST(count) v} - - Creates a list consuming count items from the stack, and pushes the resulting list onto the - stack. *) - let run env code {FFI.Instruction.opname; arg= count} = + module MAP = struct + let build_map env code cells = let open IResult.Let_syntax in - Debug.p "[%s] count = %d\n" opname count ; - let* env, items = pop_n_datastack opname env count in - let* env, items = cells_to_textual env code items in - let env, id, _typ = Env.mk_builtin_call env Builtin.PythonBuildList items in - let env = Env.push env (DataStack.Temp id) in - Ok (env, None) - end + let rec aux env = function + | [] -> + Ok [] + | key :: value :: rest -> + let* env, key, _typ = load_cell env code key in + let* tl = aux env rest in + Ok ((key, value) :: tl) + | _ -> + (* We popped 2 * count number of cells, this should be unreachable *) + L.die InternalError "BUILD_MAP wrong number of cells" + in + aux env cells + - module SET = struct - (** {v BUILD_SET(count) v} + (** {v BUILD_MAP(count) v} - Creates a set consuming count items from the stack, and pushes the resulting set onto the - stack. *) + Pushes a new dictionary object onto the stack. Pops [2 * count] items so that the dictionary + holds count entries: [{..., TOS3: TOS2, TOS1: TOS}] *) let run env code {FFI.Instruction.opname; arg= count} = let open IResult.Let_syntax in Debug.p "[%s] count = %d\n" opname count ; - let* env, items = pop_n_datastack opname env count in - let* env, items = cells_to_textual env code items in - let env, id, _typ = Env.mk_builtin_call env Builtin.PythonBuildSet items in - let env = Env.push env (DataStack.Temp id) in + let* env, items = pop_n_datastack opname env (2 * count) in + let* map = build_map env code items in + let env = Env.push env (DataStack.Map map) in Ok (env, None) end - module TUPLE = struct - (** {v BUILD_TUPLE(count) v} + module COLLECTION = struct + (** {v BUILD_LIST(count) v} + {v BUILD_SET(count) v} + {v BUILD_TUPLE(count) v} - Creates a tuple consuming count items from the stack, and pushes the resulting tuple onto - the stack. *) - let run env code {FFI.Instruction.opname; arg= count} = + Creates a collection consuming count items from the stack, and pushes the resulting + collection onto the stack. *) + let run env code {FFI.Instruction.opname; arg= count} collection = let open IResult.Let_syntax in Debug.p "[%s] count = %d\n" opname count ; let* env, items = pop_n_datastack opname env count in let* env, items = cells_to_textual env code items in - let env, id, _typ = Env.mk_builtin_call env Builtin.PythonBuildTuple items in + let env, id, _typ = Env.mk_builtin_call env (Builtin.PythonBuild collection) items in let env = Env.push env (DataStack.Temp id) in Ok (env, None) end @@ -2433,11 +2452,13 @@ let run_instruction env code ({FFI.Instruction.opname; starts_line} as instr) ne | "JUMP_IF_FALSE_OR_POP" -> JUMP.IF_OR_POP.run ~jump_if:false env code instr next_offset_opt | "BUILD_LIST" -> - BUILD.LIST.run env code instr + BUILD.COLLECTION.run env code instr Builtin.List + | "BUILD_MAP" -> + BUILD.MAP.run env code instr | "BUILD_SET" -> - BUILD.SET.run env code instr + BUILD.COLLECTION.run env code instr Builtin.Set | "BUILD_TUPLE" -> - BUILD.TUPLE.run env code instr + BUILD.COLLECTION.run env code instr Builtin.Tuple | "STORE_SUBSCR" -> STORE.SUBSCR.run env code instr | "BINARY_SUBSCR" -> diff --git a/infer/src/python/unit/PyTransTest.ml b/infer/src/python/unit/PyTransTest.ml index f185933197d..cd155c2be9a 100644 --- a/infer/src/python/unit/PyTransTest.ml +++ b/infer/src/python/unit/PyTransTest.ml @@ -2883,6 +2883,65 @@ def build_list(): declare $builtins.python_float(float) : *PyFloat declare $builtins.python_int(int) : *PyInt |}] + + + let%expect_test _ = + let source = + {| +x = "1" +s = {x : 1, "2": 2} +print(s) + +s = {"a": 42, "b": 1664} +print(s["1"]) + |} + in + test source ; + [%expect + {| + .source_language = "python" + + define dummy.$toplevel() : *PyNone { + #b0: + store &dummy::x <- $builtins.python_string("1"):*PyString + n0 = $builtins.python_build_map(n0, $builtins.python_int(1), $builtins.python_string("2"), $builtins.python_int(2)) + store &dummy::s <- n0:*PyMap + n1:*PyMap = load &dummy::s + n2 = $builtins.print(n1) + n3 = $builtins.python_build_map($builtins.python_string("a"), $builtins.python_int(42), $builtins.python_string("b"), $builtins.python_int(1664)) + store &dummy::s <- n3:*PyMap + n4:*PyMap = load &dummy::s + n5 = $builtins.python_subscript_get(n4, $builtins.python_string("1")) + n6 = $builtins.print(n5) + ret null + + } + + global dummy::x: *PyObject + + global dummy::s: *PyObject + + global $python_implicit_names::__name__: *PyString + + global $python_implicit_names::__file__: *PyString + + declare $builtins.print(...) : *PyObject + + declare $builtins.python_subscript_get(*PyObject, *PyObject) : *PyObject + + declare $builtins.python_build_map(...) : *PyMap + + declare $builtins.python_tuple(...) : *PyObject + + declare $builtins.python_bytes(*Bytes) : *PyBytes + + declare $builtins.python_string(*String) : *PyString + + declare $builtins.python_bool(int) : *PyBool + + declare $builtins.python_float(float) : *PyFloat + + declare $builtins.python_int(int) : *PyInt |}] end )