Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved unicode support in mutator, flattener, and more #2662

Open
wants to merge 17 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PY_MODULE := slither
TEST_MODULE := tests

ALL_PY_SRCS := $(shell find $(PY_MODULE) -name '*.py') \
$(shell find test -name '*.py')
$(shell find tests -name '*.py')

# Optionally overridden by the user, if they're using a virtual environment manager.
VENV ?= env
Expand Down Expand Up @@ -85,4 +85,4 @@ package: $(VENV)/pyvenv.cfg

.PHONY: edit
edit:
$(EDITOR) $(ALL_PY_SRCS)
$(EDITOR) $(ALL_PY_SRCS)
8 changes: 4 additions & 4 deletions slither/analyses/evm/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,12 @@ def _get_evm_instructions_node(node_info):
contract_file = (
node_info["slither"]
.source_code[node_info["contract"].source_mapping.filename.absolute]
.encode("utf-8")
.encode("utf8")
)

# Get evm instructions corresponding to node's source line number
node_source_line = (
contract_file[0 : node_info["node"].source_mapping.start].count("\n".encode("utf-8")) + 1
contract_file[0 : node_info["node"].source_mapping.start].count("\n".encode("utf8")) + 1
)
node_pcs = contract_pcs.get(node_source_line, [])
node_ins = []
Expand Down Expand Up @@ -169,7 +169,7 @@ def generate_source_to_evm_ins_mapping(evm_instructions, srcmap_runtime, slither
"""

source_to_evm_mapping = {}
file_source = slither.source_code[filename].encode("utf-8")
file_source = slither.source_code[filename].encode("utf8")
prev_mapping = []

for idx, mapping in enumerate(srcmap_runtime):
Expand All @@ -193,7 +193,7 @@ def generate_source_to_evm_ins_mapping(evm_instructions, srcmap_runtime, slither
# See https://github.com/ethereum/solidity/issues/6119#issuecomment-467797635
continue

line_number = file_source[0 : int(offset)].count("\n".encode("utf-8")) + 1
line_number = file_source[0 : int(offset)].count("\n".encode("utf8")) + 1

# Append evm instructions to the corresponding source line number
# Note: Some evm instructions in mapping are not necessarily in program execution order
Expand Down
11 changes: 9 additions & 2 deletions slither/core/source_mapping/source_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,20 @@ def content(self) -> str:
"""
Return the txt content of the Source

Returns:
Use this property instead of eg source_code[start:end]
Above will return incorrect content if source_code contains any unicode
because self.start and self.end are byte offsets, not char offsets

Returns: str
"""
# If the compilation unit was not initialized, it means that the set_offset was never called
# on the corresponding object, which should not happen
assert self.compilation_unit
return self.compilation_unit.core.source_code[self.filename.absolute][self.start : self.end]
return (
self.compilation_unit.core.source_code[self.filename.absolute]
.encode("utf8")[self.start : self.end]
.decode("utf8")
)

@property
def content_hash(self) -> str:
Expand Down
6 changes: 3 additions & 3 deletions slither/detectors/source/rtlo.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,16 @@ class RightToLeftOverride(AbstractDetector):

WIKI_RECOMMENDATION = "Special control characters must not be allowed."

RTLO_CHARACTER_ENCODED = "\u202e".encode("utf-8")
RTLO_CHARACTER_ENCODED = "\u202e".encode("utf8")
STANDARD_JSON = False

def _detect(self) -> List[Output]:
results = []
pattern = re.compile(".*\u202e.*".encode("utf-8"))
pattern = re.compile(".*\u202e.*".encode("utf8"))

for filename, source in self.slither.source_code.items():
# Attempt to find all RTLO characters in this source file.
original_source_encoded = source.encode("utf-8")
original_source_encoded = source.encode("utf8")
start_index = 0

# Keep searching all file contents for the character.
Expand Down
2 changes: 1 addition & 1 deletion slither/formatters/attributes/const_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _patch(
in_file_str = compilation_unit.core.source_code[in_file].encode("utf8")
old_str_of_interest = in_file_str[modify_loc_start:modify_loc_end]
# Find the keywords view|pure|constant and remove them
m = re.search("(view|pure|constant)", old_str_of_interest.decode("utf-8"))
m = re.search("(view|pure|constant)", old_str_of_interest.decode("utf8"))
if m:
create_patch(
result,
Expand Down
6 changes: 3 additions & 3 deletions slither/formatters/functions/external_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,16 @@ def _patch(
old_str_of_interest = in_file_str[modify_loc_start:modify_loc_end]
# Search for 'public' keyword which is in-between the function name and modifier name (if present)
# regex: 'public' could have spaces around or be at the end of the line
m = re.search(r"((\spublic)\s+)|(\spublic)$|(\)public)$", old_str_of_interest.decode("utf-8"))
m = re.search(r"((\spublic)\s+)|(\spublic)$|(\)public)$", old_str_of_interest.decode("utf8"))
if m is None:
# No visibility specifier exists; public by default.
create_patch(
result,
in_file,
# start after the function definition's closing paranthesis
modify_loc_start + len(old_str_of_interest.decode("utf-8").split(")")[0]) + 1,
modify_loc_start + len(old_str_of_interest.decode("utf8").split(")")[0]) + 1,
# end is same as start because we insert the keyword `external` at that location
modify_loc_start + len(old_str_of_interest.decode("utf-8").split(")")[0]) + 1,
modify_loc_start + len(old_str_of_interest.decode("utf8").split(")")[0]) + 1,
"",
" external",
) # replace_text is `external`
Expand Down
5 changes: 4 additions & 1 deletion slither/formatters/naming_convention/naming_convention.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,10 @@ def _is_var_declaration(slither: SlitherCompilationUnit, filename: str, start: i
:return:
"""
v = "var "
return slither.core.source_code[filename][start : start + len(v)] == v
return (
slither.core.source_code[filename].encode("utf8")[start : start + len(v)].decode("utf8")
== v
)


def _explore_type( # pylint: disable=too-many-arguments,too-many-locals,too-many-branches
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _patch( # pylint: disable=too-many-arguments
old_str_of_interest = in_file_str[modify_loc_start:modify_loc_end]
# Add keyword `constant` before the variable name
(new_str_of_interest, num_repl) = re.subn(
match_text, replace_text, old_str_of_interest.decode("utf-8"), 1
match_text, replace_text, old_str_of_interest.decode("utf8"), 1
)
if num_repl != 0:
create_patch(
Expand Down
6 changes: 3 additions & 3 deletions slither/formatters/variables/unused_state_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ def _patch(
in_file_str = compilation_unit.core.source_code[in_file].encode("utf8")
old_str_of_interest = in_file_str[modify_loc_start:]
old_str = (
old_str_of_interest.decode("utf-8").partition(";")[0]
+ old_str_of_interest.decode("utf-8").partition(";")[1]
old_str_of_interest.decode("utf8").partition(";")[0]
+ old_str_of_interest.decode("utf8").partition(";")[1]
)

create_patch(
result,
in_file,
int(modify_loc_start),
# Remove the entire declaration until the semicolon
int(modify_loc_start + len(old_str_of_interest.decode("utf-8").partition(";")[0]) + 1),
int(modify_loc_start + len(old_str_of_interest.decode("utf8").partition(";")[0]) + 1),
old_str,
"",
)
2 changes: 1 addition & 1 deletion slither/slithir/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def _fits_under_byte(val: Union[int, str]) -> List[str]:
size = len(hex_val) // 2
return [f"bytes{size}"]
# val is a str
length = len(val.encode("utf-8"))
length = len(val.encode("utf8"))
return [f"bytes{f}" for f in range(length, 33)] + ["bytes"]


Expand Down
26 changes: 15 additions & 11 deletions slither/tools/documentation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,17 @@ def _handle_function(
):
return overwrite
prompt = "Create a natpsec documentation for this solidity code with only notice and dev.\n"
src_mapping = function.source_mapping
content = function.compilation_unit.core.source_code[src_mapping.filename.absolute]
start = src_mapping.start
end = src_mapping.start + src_mapping.length
prompt += content[start:end]

use_tab = _use_tab(content[start - 1])
if use_tab is None and src_mapping.starting_column > 1:
logger.info(f"Non standard space indentation found {content[start - 1:end]}")
srcmap = function.source_mapping
src = function.compilation_unit.core.source_code[srcmap.filename.absolute]
first_char_index = len(
src.encode("utf8")[: srcmap.start].decode("utf8")
) # convert byte offset to char offset
prev_char = src[first_char_index - 1]
prompt += srcmap.content

use_tab = _use_tab(prev_char)
if use_tab is None and srcmap.starting_column > 1:
logger.info(f"Non standard indentation found: '{prev_char}'")
if overwrite:
logger.info("Disable overwrite to avoid mistakes")
overwrite = False
Expand All @@ -189,7 +191,7 @@ def _handle_function(
if logging_file:
codex.log_codex(logging_file, "A: " + str(answer))

answer_processed = _handle_codex(answer, src_mapping.starting_column, use_tab, force)
answer_processed = _handle_codex(answer, srcmap.starting_column, use_tab, force)
if answer_processed:
break

Expand All @@ -201,7 +203,9 @@ def _handle_function(
if not answer_processed:
return overwrite

create_patch(all_patches, src_mapping.filename.absolute, start, start, "", answer_processed)
create_patch(
all_patches, srcmap.filename.absolute, srcmap.start, srcmap.start, "", answer_processed
)

return overwrite

Expand Down
51 changes: 36 additions & 15 deletions slither/tools/flattening/flattening.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ def _get_source_code(
:return:
"""
src_mapping = contract.source_mapping
content = self._compilation_unit.core.source_code[src_mapping.filename.absolute]
start = src_mapping.start
end = src_mapping.start + src_mapping.length
src_bytes = self._compilation_unit.core.source_code[src_mapping.filename.absolute].encode(
"utf8"
)

to_patch = []
# interface must use external
Expand All @@ -124,7 +124,7 @@ def _get_source_code(
+ f.parameters_src().source_mapping.length
)
attributes_end = f.returns_src().source_mapping.start
attributes = content[attributes_start:attributes_end]
attributes = src_bytes[attributes_start:attributes_end].decode("utf8")
regex = re.search(r"((\sexternal)\s+)|(\sexternal)$|(\)external)$", attributes)
if regex:
to_patch.append(
Expand All @@ -140,7 +140,7 @@ def _get_source_code(
if var.location == "calldata":
calldata_start = var.source_mapping.start
calldata_end = calldata_start + var.source_mapping.length
calldata_idx = content[calldata_start:calldata_end].find(" calldata ")
calldata_idx = src_bytes[calldata_start:calldata_end].find(" calldata ")
to_patch.append(
Patch(
calldata_start + calldata_idx + 1,
Expand All @@ -158,7 +158,7 @@ def _get_source_code(
+ f.parameters_src().source_mapping["length"]
)
attributes_end = f.returns_src().source_mapping["start"]
attributes = content[attributes_start:attributes_end]
attributes = src_bytes[attributes_start:attributes_end].decode("utf8")
regex = (
re.search(r"((\sexternal)\s+)|(\sexternal)$|(\)external)$", attributes)
if visibility == "external"
Expand All @@ -183,7 +183,7 @@ def _get_source_code(
if variable.visibility == "private":
attributes_start = variable.source_mapping.start
attributes_end = attributes_start + variable.source_mapping.length
attributes = content[attributes_start:attributes_end]
attributes = src_bytes[attributes_start:attributes_end].decode("utf8")
regex = re.search(r" private ", attributes)
if regex:
to_patch.append(
Expand Down Expand Up @@ -211,26 +211,47 @@ def _get_source_code(

to_patch.sort(key=lambda x: x.index, reverse=True)

content = content[start:end]
content = src_mapping.content.encode("utf8")
start = src_mapping.start
for patch in to_patch:
patch_type = patch.patch_type
index = patch.index
index = index - start
if patch_type == "public_to_external":
content = content[:index] + "public" + content[index + len("external") :]
content = (
content[:index].decode("utf8")
+ "public"
+ content[index + len("external") :].decode("utf8")
)
elif patch_type == "external_to_internal":
content = content[:index] + "internal" + content[index + len("external") :]
content = (
content[:index].decode("utf8")
+ "internal"
+ content[index + len("external") :].decode("utf8")
)
elif patch_type == "public_to_internal":
content = content[:index] + "internal" + content[index + len("public") :]
content = (
content[:index].decode("utf8")
+ "internal"
+ content[index + len("public") :].decode("utf8")
)
elif patch_type == "private_to_internal":
content = content[:index] + "internal" + content[index + len("private") :]
content = (
content[:index].decode("utf8")
+ "internal"
+ content[index + len("private") :].decode("utf8")
)
elif patch_type == "calldata_to_memory":
content = content[:index] + "memory" + content[index + len("calldata") :]
content = (
content[:index].decode("utf8")
+ "memory"
+ content[index + len("calldata") :].decode("utf8")
)
else:
assert patch_type == "line_removal"
content = content[:index] + " // " + content[index:]
content = content[:index].decode("utf8") + " // " + content[index:].decode("utf8")

self._source_codes[contract] = content
self._source_codes[contract] = content.decode("utf8")

def _pragmas(self) -> str:
"""
Expand Down
15 changes: 11 additions & 4 deletions slither/tools/mutator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,10 @@ def main() -> None: # pylint: disable=too-many-statements,too-many-branches,too
# get all the contracts as a list from given codebase
sol_file_list: List[str] = get_sol_file_list(Path(args.codebase), paths_to_ignore_list)

logger.info(blue("Preparing to mutate files:\n- " + "\n- ".join(sol_file_list)))
if not contract_names:
logger.info(blue("Preparing to mutate files:\n- " + "\n- ".join(sol_file_list)))
else:
logger.info(blue("Preparing to mutate contracts:\n- " + "\n- ".join(contract_names)))

# folder where backup files and uncaught mutants are saved
if output_dir is None:
Expand Down Expand Up @@ -240,7 +243,8 @@ def main() -> None: # pylint: disable=too-many-statements,too-many-branches,too

# perform mutations on {target_contract} in file {file_name}
# setup placeholder val to signal whether we need to skip if no target_contract is found
target_contract = "SLITHER_SKIP_MUTATIONS" if contract_names else ""
skip_flag = "SLITHER_SKIP_MUTATIONS"
target_contract = skip_flag if contract_names else ""
try:
# loop through all contracts in file_name
for compilation_unit_of_main_file in sl.compilation_units:
Expand All @@ -258,8 +262,7 @@ def main() -> None: # pylint: disable=too-many-statements,too-many-branches,too
)
continue

if target_contract == "SLITHER_SKIP_MUTATIONS":
logger.debug(f"Skipping mutations in {filename}")
if target_contract == skip_flag:
continue

# TODO: find a more specific way to omit interfaces
Expand Down Expand Up @@ -334,6 +337,10 @@ def main() -> None: # pylint: disable=too-many-statements,too-many-branches,too
# transfer and delete the backup files
transfer_and_delete(files_dict)

if target_contract == skip_flag:
logger.debug(f"No target contracts found in {filename}, skipping")
continue

# log results for this file
logger.info(blue(f"Done mutating {target_contract}."))
if total_mutant_counts[0] > 0:
Expand Down
8 changes: 5 additions & 3 deletions slither/tools/mutator/mutators/AOR.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,13 @@ def _mutate(self) -> Dict:
# Get the string
start = node.source_mapping.start
stop = start + node.source_mapping.length
old_str = self.in_file_str[start:stop]
old_str = node.source_mapping.content
line_no = node.source_mapping.lines
if not line_no[0] in self.dont_mutate_line:
# Replace the expression with true
new_str = f"{old_str.split(ir.type.value)[0]}{op.value}{old_str.split(ir.type.value)[1]}"
halves = old_str.split(ir.type.value)
if len(halves) != 2:
continue # skip if assembly
new_str = f"{halves[0]}{op.value}{halves[1]}"
create_patch_with_line(
result,
self.in_file,
Expand Down
2 changes: 1 addition & 1 deletion slither/tools/mutator/mutators/ASOR.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _mutate(self) -> Dict:
if op != ir.expression:
start = node.source_mapping.start
stop = start + node.source_mapping.length
old_str = self.in_file_str[start:stop]
old_str = node.source_mapping.content
line_no = node.source_mapping.lines
if not line_no[0] in self.dont_mutate_line:
# Replace the expression with true
Expand Down
Loading
Loading