Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Web UI #564

Merged
merged 5 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ jobs:
-DLLVM_DIR=${LLVM_PREFIX_DIR}/lib/cmake/llvm \
-DMLIR_DIR=${LLVM_PREFIX_DIR}/lib/cmake/mlir \
-DMX_USE_VENDORED_CLANG=OFF \
-DLLVM_ENABLE_LLD:BOOL=TRUE \
-GNinja \
./

Expand Down
3 changes: 1 addition & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ RUN cmake \
-DCMAKE_C_COMPILER="$(which clang-18)" \
-DCMAKE_CXX_COMPILER="$(which clang++-18)" \
-DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \
-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=TRUE \
-DLLVM_ENABLE_LLD:BOOL=TRUE
-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=TRUE

RUN cmake --build '/work/build/multiplier' --target install
RUN chmod +x /work/install/bin/*
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ We like to say that with its APIs, *you can get everywhere from anywhere*.
* Writeups
* [regreSSHion OpenSSH variant analysis](docs/openssh-variant-analysis.md)
* [PHP variant analysis](docs/php-variant-analysis.md)
* Included tools
* Included Python tools
* [Web-based code browser for browsing code from a database](docs/web-browser.md)
* [Group functions by type](docs/group-functions.md)
* [Find cycles in Qt signals/slots connects](docs/qt-signals.py)
* Included C++ tools
* [Find function calls inside macro argument lists](docs/mx-find-calls-in-macro-expansions.md)
* [Find possible divergent representations](docs/mx-find-divergent-candidates.md)
* [Find uses of `copy_to_user` in the Linux kernel that overwrite flexible array members](docs/mx-find-flexible-user-copies.md)
Expand Down
251 changes: 251 additions & 0 deletions bin/Examples/GroupFunctions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# Copyright 2024, Peter Goodman. All rights reserved.

"""
This program approximately groups functions by their types, and then prints
out the grouped functions.

XREF: https://x.com/eatonphil/status/1821573274582823247
"""

import argparse
import collections
from typing import List, DefaultDict, Set, Optional, Iterable

import multiplier as mx


def sep(out: List[str], prev_out_len: int, tok: str):
if len(out) != prev_out_len:
out.append(tok)


def mangle(ty: mx.ast.Type, out: List[str],
strip_trivial_qualifiers: bool = False,
func: Optional[mx.ast.FunctionDecl] = None) -> None:

# TODO(pag): Look into throwing, `const`-qualified methods, etc.
if isinstance(ty, mx.ast.FunctionType):
out.append("Func(")
out_len = len(out)

# Try to inject the `this` pointer type.
if isinstance(func, mx.ast.CXXMethodDecl):
if this_ty := func.this_type:
mangle(this_ty, out, False)

if isinstance(ty, mx.ast.FunctionProtoType):
for pty in ty.parameter_types:
sep(out, out_len, ", ")
mangle(pty, out, True)
if ty.is_variadic:
sep(out, out_len, ", ")
out.append("...")
else:
sep(out, out_len, ", ")
out.append("...")
out.append("; ")
mangle(ty.return_type, out, True)
out.append(")")

elif isinstance(ty, mx.ast.QualifiedType):
bty = ty.unqualified_desugared_type

# Try to double check if we think we should ignore trivial qualifiers.
# The idea here is that if you have two functions, one taking an `int`
# parameter, and the other taking a `const int` parameter, then their
# prototypes are functionally identical. The `const` qualifier really
# only impacts whether or not the bodies of those functions can assign
# to those parameters.
#
# Similarly, we always want to strip off a `const` qualified on
# something like an `int` return type, because `const` on the return
# type is mostly meaningless.
if strip_trivial_qualifiers:
if isinstance(bty, mx.ast.PointerType):
if not isinstance(bty.pointee_type, mx.ast.FunctionType):
strip_trivial_qualifiers = False
elif isinstance(bty, mx.ast.ReferenceType):
strip_trivial_qualifiers = False

if not strip_trivial_qualifiers:
out_len = len(out)
if ty.is_const_qualified:
out.append("C")
if ty.is_restrict_qualified:
out.append("V")
if ty.is_restrict_qualified:
out.append("R")
if ty.has_address_space:
out.append(f"-{ty.address_space.name}")
sep(out, out_len, " ")
mangle(bty, out, False)

elif isinstance(ty, mx.ast.BuiltinType):
out.append(str(ty.builtin_kind.name))

elif isinstance(ty, mx.ast.TypedefType):
mangle(ty.declaration.underlying_type, out, strip_trivial_qualifiers)

elif isinstance(ty, mx.ast.UsingType):
mangle(ty.underlying_type, out, strip_trivial_qualifiers)

elif isinstance(ty, mx.ast.TagType):
decl = ty.declaration.canonical_declaration
out.append(f"{decl.kind.name}({decl.id})")

elif isinstance(ty, mx.ast.AtomicType):
out.append("Atomic(")
mangle(ty.value_type, out, False)
out.append(")")

# NOTE(pag): Ignores all the derived classes.
elif isinstance(ty, mx.ast.VectorType):
out.append("Vector(")
mangle(ty.element_type, out, False)
out.append(", ")
out.append(ty.vector_kind.name)
out.append(")")

# NOTE(pag): Ignores all the derived classes.
elif isinstance(ty, mx.ast.ArrayType):
out.append("Array(")
mangle(ty.element_type, out, False)
out.append(", ")
out.append(ty.size_modifier.name)
out.append(")")

elif isinstance(ty, mx.ast.TemplateSpecializationType):
if aliased_ty := ty.aliased_type:
mangle(aliased_ty, out, strip_trivial_qualifiers)
else:
out.append("?")

elif isinstance(ty, (mx.ast.PointerType, mx.ast.ReferenceType)):
name = ty.__class__.__name__[:-4]
out.append(f"{name}(")
mangle(ty.pointee_type, out, False)
out.append(")")

elif isinstance(ty, mx.ast.ParenType):
mangle(ty.inner_type, out, strip_trivial_qualifiers)

elif isinstance(ty, mx.ast.MacroQualifiedType):
mangle(ty.underlying_type, out, strip_trivial_qualifiers)

else:
if ty.__class__.__name__.startswith("Dependent"):
raise Exception("Ignoring dependent types")

# TODO(pag): Add more.
out.append(ty.kind.name)


def canon_type(ty: mx.ast.Type, out: List):
if isinstance(ty, mx.ast.FunctionType):
out.append(ty.kind)


def group_functions_by_type(index: mx.Index | str) -> List[List[int]]:
grouped_functions: DefaultDict[str, List[int]] = collections.defaultdict(
list)
seen: Set[int] = set()

if isinstance(index, str):
index = mx.Index.in_memory_cache(mx.Index.from_database(args.db))

# Iterate over all functions in the project, and group them by a form of their
# mangled type.
for func in mx.ast.FunctionDecl.IN(index):
func = func.canonical_declaration

# Don't repeat more of the same function.
func_id = func.id
if func_id in seen:
continue

seen.add(func_id)

out: List[str] = []
try:
mangle(func.type, out, False, func)

# Likely a dependent type, i.e. an unspecialized function template, or a
# function inside of a template. We can't reliably compare dependent
# function types.
except:
continue

type_str: str = "".join(out)
grouped_functions[type_str].append(func.id)

return list(grouped_functions.values())


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Multiplier Code Browser")
parser.add_argument('--db', type=str,
required=True,
help="Path to mx-index-produced database")
args = parser.parse_args()
index = mx.Index.in_memory_cache(mx.Index.from_database(args.db))
name_config = mx.ast.QualifiedNameRenderOptions(fully_qualified=True)
flc = mx.frontend.FileLocationCache()
first = True

# Dump out the groups functions.
for funcs in group_functions_by_type(index):

# Skip groups with only one function
if len(funcs) == 1:
continue

if not first:
print()
print("///////////////////////////////////////////////////////////////")

first = False
nested_first = True

for func_id in funcs:
func = index.declaration(func_id)
func_file_toks = func.tokens.file_tokens

if not nested_first:
print()

nested_first = False

# Get the fully qualified name. This will let the human distinguish
# methods.
func_name = func.qualified_name(name_config).data

# Print out the name and optionally the location of the function. The
# function's name as printed may appear different than what shows up
# in the code snippet below. This can happen if one function has an
# `alias` attribute for another, or if the definition of the function
# in the file is actually subject to macro substitution.
first_tok = func_file_toks.front
if file := mx.frontend.File.containing(first_tok):
path = str(next(file.paths))
if line_col := first_tok.location(flc):
print(f"// \t{func_name} @ {path}:{line_col[0]}:{line_col[1]}")
else:
print(f"// \t{func_name} @ {path}")
else:
print(f"// \t{func_name}")

# If we're dealing with the function definition, then try to chop the
# printing off at the beginning of the function's body.
max_index = func_file_toks.size
if body := func.body:
if body_index := func_file_toks.index_of(body.tokens.file_tokens.front):
max_index = body_index

# Render out the tokens (as they appear in the file), except doubly
# indendet.
token_datas: List[str] = ["\t\t"]
for i, tok in enumerate(func_file_toks):
if i >= max_index:
break
token_datas.append(tok.data)
print("".join(token_datas).replace("\n", "\n\t\t"))
Loading