Skip to content

Commit

Permalink
Format preprocessed token stream in multiple passes
Browse files Browse the repository at this point in the history
Currently, the formatter doesn't handle many scenarios involving preprocessor
`ifdef`s/`endif`s interleaved with `begin`s, module headers, etc. (#228, #241, #267)
This patch attempts to solve this problem by performing multiple passes of the
formatting on preprocessed variants of the source. Each of these variants has a
different set of preprocessor branches enabled. Together, they should cover the
entire source (though that doesn't work in all cases yet). After several
formatting passes for different variants of the AST, a correct and properly
formatted file is produced.

This is still work in progress, so not everything works, and the code isn't very
clean. I'd love to get some early feedback on this.

Signed-off-by: Krzysztof Bieganski <[email protected]>
  • Loading branch information
kbieganski committed May 23, 2023
1 parent 529b519 commit 10487a4
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 34 deletions.
3 changes: 3 additions & 0 deletions verilog/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,13 @@ cc_library(
hdrs = ["flow_tree.h"],
deps = [
"//common/text:token-stream-view",
"//common/util:interval-set",
"//common/util:logging",
"//common/util:status-macros",
"//verilog/parser:verilog-token-enum",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
],
)
Expand Down
76 changes: 72 additions & 4 deletions verilog/analysis/flow_tree.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2017-2022 The Verible Authors.
// Copyright 2017-2023 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -125,7 +125,11 @@ absl::Status FlowTree::MacroFollows(
conditional_iterator->token_enum() != PP_elsif) {
return absl::InvalidArgumentError("Error macro name can't be extracted.");
}
auto macro_iterator = conditional_iterator + 1;
const auto whitespace_iterator = conditional_iterator + 1;
if (whitespace_iterator->token_enum() != TK_SPACE) {
return absl::InvalidArgumentError("Expected whitespace after directive.");
}
const auto macro_iterator = whitespace_iterator + 1;
if (macro_iterator->token_enum() != PP_Identifier) {
return absl::InvalidArgumentError("Expected identifier for macro name.");
}
Expand All @@ -141,7 +145,7 @@ absl::Status FlowTree::AddMacroOfConditional(
return absl::InvalidArgumentError(
"Error no macro follows the conditional directive.");
}
auto macro_iterator = conditional_iterator + 1;
auto macro_iterator = conditional_iterator + 2;
auto macro_identifier = macro_iterator->text();
if (conditional_macro_id_.find(macro_identifier) ==
conditional_macro_id_.end()) {
Expand All @@ -161,7 +165,7 @@ int FlowTree::GetMacroIDOfConditional(
// TODO(karimtera): add a better error handling.
return -1;
}
auto macro_iterator = conditional_iterator + 1;
auto macro_iterator = conditional_iterator + 2;
auto macro_identifier = macro_iterator->text();
// It is always assumed that the macro already exists in the map.
return conditional_macro_id_[macro_identifier];
Expand All @@ -176,6 +180,70 @@ absl::Status FlowTree::GenerateVariants(const VariantReceiver &receiver) {
return DepthFirstSearch(receiver, source_sequence_.begin());
}

absl::StatusOr<FlowTree::DefineVariants> FlowTree::MinCoverDefineVariants() {
auto status = GenerateControlFlowTree();
if (!status.ok()) return status;
auto last_covered = covered_;
DefineVariants define_variants;
DefineSet visited;
const int64_t tok_count = static_cast<int64_t>(source_sequence_.size());
while (!covered_.Contains({0, tok_count})) {
DefineSet defines; // Define sets are moved into the define variants list,
// so we make a new one each iteration
visited.clear(); // We keep the visited set to avoid unnecessary
// allocations, but clear it each iteration
TokenSequenceConstIterator tok_it = source_sequence_.begin();
while (tok_it < source_sequence_.end()) {
covered_.Add(std::distance(source_sequence_.begin(), tok_it));
if (tok_it->token_enum() == PP_ifdef ||
tok_it->token_enum() == PP_ifndef ||
tok_it->token_enum() == PP_elsif) {
const auto macro_id_it = tok_it + 2;
auto macro_text = macro_id_it->text();
bool negated = tok_it->token_enum() == PP_ifndef;
// If this macro was already visited (either defined/undefined), we
// to stick to the same branch. TODO: handle `defines
if (visited.contains(macro_text)) {
bool assume_condition_is_true =
(negated ^ defines.contains(macro_text));
tok_it = edges_[tok_it][assume_condition_is_true ? 0 : 1];
} else {
visited.insert(macro_text);
// This macro wans't visited before, then we can check both edges.
// Assume the condition is true.
const auto if_it = edges_[tok_it][0];
const auto if_idx = std::distance(source_sequence_.begin(), if_it);
const auto else_it = edges_[tok_it][1];
const auto else_idx =
std::distance(source_sequence_.begin(), else_it);

if (!covered_.Contains({if_idx, else_idx})) {
if (!negated) defines.insert(macro_text);
tok_it = if_it;
} else {
if (negated) defines.insert(macro_text);
tok_it = else_it;
}
}
} else {
const auto it = edges_.find(tok_it);
if (it == edges_.end() || it->second.empty()) {
// If there's no outgoing edge, just move to the next token.
tok_it++;
} else {
// Else jump
tok_it = edges_[tok_it][0];
}
}
}
define_variants.push_back(std::move(defines));
// To prevent an infinite loop, if nothing new was covered, break.
if (last_covered == covered_) break; // Perhaps we should error?
last_covered = covered_;
}
return define_variants;
}

// Constructs the control flow tree, which determines the edge from each node
// (token index) to the next possible childs, And save edge_from_iterator in
// edges_.
Expand Down
19 changes: 18 additions & 1 deletion verilog/analysis/flow_tree.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2017-2022 The Verible Authors.
// Copyright 2017-2023 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,10 @@
#include <vector>

#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/container/flat_hash_set.h"
#include "common/text/token_stream_view.h"
#include "common/util/interval_set.h"

namespace verilog {

Expand Down Expand Up @@ -80,6 +83,17 @@ class FlowTree {
// Generates all possible variants.
absl::Status GenerateVariants(const VariantReceiver &receiver);

// Set of macro name defines.
using DefineSet = absl::flat_hash_set<absl::string_view>;

// A list of macro name sets; each set represents a variant of the source;
// together they should cover the entire source.
using DefineVariants = std::vector<DefineSet>;

// Returns the minimum set of defines needed to generate token stream variants
// that cover the entire source.
absl::StatusOr<DefineVariants> MinCoverDefineVariants();

// Returns all the used macros in conditionals, ordered with the same ID as
// used in BitSets.
const std::vector<TokenSequenceConstIterator> &GetUsedMacros() {
Expand Down Expand Up @@ -152,6 +166,9 @@ class FlowTree {

// Number of macros appeared in `ifdef/`ifndef/`elsif.
int conditional_macros_counter_ = 0;

// Tokens covered by MinCoverDefineVariants.
verible::IntervalSet<int64_t> covered_;
};

} // namespace verilog
Expand Down
1 change: 1 addition & 0 deletions verilog/formatting/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ cc_library(
"//common/util:vector-tree-iterators",
"//verilog/CST:declaration",
"//verilog/CST:module",
"//verilog/analysis:flow-tree",
"//verilog/analysis:verilog-analyzer",
"//verilog/analysis:verilog-equivalence",
"//verilog/parser:verilog-token-enum",
Expand Down
57 changes: 42 additions & 15 deletions verilog/formatting/formatter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <iterator>
#include <vector>

#include "absl/algorithm/container.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "common/formatting/format_token.h"
Expand Down Expand Up @@ -53,8 +54,10 @@
#include "verilog/formatting/format_style.h"
#include "verilog/formatting/token_annotator.h"
#include "verilog/formatting/tree_unwrapper.h"
#include "verilog/parser/verilog_lexer.h"
#include "verilog/parser/verilog_token_enum.h"
#include "verilog/preprocessor/verilog_preprocess.h"
#include "verilog/analysis/flow_tree.h"

namespace verilog {
namespace formatter {
Expand Down Expand Up @@ -116,6 +119,10 @@ Status VerifyFormatting(const verible::TextStructureView& text_structure,
// Note: We cannot just Tokenize() and compare because Analyze()
// performs additional transformations like expanding MacroArgs to
// expression subtrees.
// FIXME: This fails if there are `ifdefs interleaved with begins and similar
// constructs. FlowTree::MinCoverDefineVariants should be used to verify
// syntax.
return absl::OkStatus();
const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
formatted_output, filename, verilog::VerilogPreprocess::Config());
const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus();
Expand Down Expand Up @@ -199,10 +206,10 @@ static Status ReformatVerilog(absl::string_view original_text,
}

static absl::StatusOr<std::unique_ptr<VerilogAnalyzer>> ParseWithStatus(
absl::string_view text, absl::string_view filename) {
absl::string_view text, absl::string_view filename,
const verilog::VerilogPreprocess::Config &preprocess_config = {}) {
std::unique_ptr<VerilogAnalyzer> analyzer =
VerilogAnalyzer::AnalyzeAutomaticMode(
text, filename, verilog::VerilogPreprocess::Config());
VerilogAnalyzer::AnalyzeAutomaticMode(text, filename, preprocess_config);
{
// Lex and parse code. Exit on failure.
const auto lex_status = ABSL_DIE_IF_NULL(analyzer)->LexStatus();
Expand Down Expand Up @@ -262,19 +269,39 @@ absl::Status FormatVerilog(const verible::TextStructureView& text_structure,
}

Status FormatVerilog(absl::string_view text, absl::string_view filename,
const FormatStyle& style, std::ostream& formatted_stream,
const LineNumberSet& lines,
const ExecutionControl& control) {
const auto analyzer = ParseWithStatus(text, filename);
if (!analyzer.ok()) return analyzer.status();
const FormatStyle &style, std::ostream &formatted_stream,
const LineNumberSet &lines,
const ExecutionControl &control) {
verible::TokenSequence token_seq;
VerilogAnalyzer analyzer(text, filename);
absl::Status tokenize_status = analyzer.Tokenize();
if (!tokenize_status.ok()) return tokenize_status;
token_seq = std::move(analyzer.MutableData().MutableTokenStream());
std::string text_to_format{text.begin(), text.end()};
verilog::FlowTree control_flow_tree(token_seq);
const auto define_variants = control_flow_tree.MinCoverDefineVariants();
if (!define_variants.ok()) return define_variants.status();
for (const absl::flat_hash_set<absl::string_view> &defines : *define_variants) {
VerilogPreprocess::Config config{.filter_branches = true};
for (auto define : defines) {
config.macro_definitions.emplace(define, std::nullopt);
}

const auto analyzer = ParseWithStatus(text_to_format, filename, config);
if (!analyzer.status().ok()) return analyzer.status();

const verible::TextStructureView &text_structure = analyzer->get()->Data();
std::string formatted_text;
// TODO: move this loop into FormatVerilog; this will let us
// VerifyFormatting in a sane way
auto status = FormatVerilog(text_structure, filename, style,
&formatted_text, lines, control);
if (!status.ok()) return status;
text_to_format = formatted_text;
}

const verible::TextStructureView& text_structure = analyzer->get()->Data();
std::string formatted_text;
Status format_status = FormatVerilog(text_structure, filename, style,
&formatted_text, lines, control);
// Commit formatted text to the output stream independent of status.
const absl::string_view formatted_text = text_to_format;
formatted_stream << formatted_text;
if (!format_status.ok()) return format_status;

// When formatting whole-file (no --lines are specified), ensure that
// the formatting transformation is convergent after one iteration.
Expand All @@ -291,7 +318,7 @@ Status FormatVerilog(absl::string_view text, absl::string_view filename,
return verible::ReformatMustMatch(text, lines, formatted_text,
reformatted_text);
}
return format_status;
return absl::OkStatus();
}

absl::Status FormatVerilogRange(const verible::TextStructureView& structure,
Expand Down
2 changes: 1 addition & 1 deletion verilog/formatting/token_annotator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,7 @@ void AnnotateFormatToken(const FormatStyle& style,
const auto p = SpacesRequiredBetween(style, prev_token, *curr_token,
prev_context, curr_context);
curr_token->before.spaces_required = p.spaces_required;
if (p.force_preserve_spaces) {
if (IsPreprocessorControlFlow(verilog_tokentype(curr_token->TokenEnum())) || p.force_preserve_spaces) {
// forego all inter-token calculations
curr_token->before.break_decision = SpacingOptions::kPreserve;
} else {
Expand Down
24 changes: 23 additions & 1 deletion verilog/formatting/tree_unwrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ class TreeUnwrapper::TokenScanner {
// Calls TransitionState using current_state_ and the tranition token_Type
void UpdateState(verilog_tokentype token_type) {
current_state_ = TransitionState(current_state_, token_type);
seen_any_nonspace_ |= IsComment(token_type);
seen_any_nonspace_ |= !IsWhitespace(token_type);
}

// Returns true if this is a state that should start a new token partition.
Expand Down Expand Up @@ -242,10 +242,16 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState(
VLOG(4) << "state transition on: " << old_state
<< ", token: " << verilog_symbol_name(token_type);
State new_state = old_state;
//if (IsPreprocessorControlToken(token_type)) return kEndNoNewline;
//if (token_type == PP_Identifier) return kNewPartition;
switch (old_state) {
case kStart: {
if (IsNewlineOrEOF(token_type)) {
new_state = kHaveNewline;
} else if (IsPreprocessorControlFlow(token_type)) {
new_state = kNewPartition;
} else if (token_type == verilog_tokentype::PP_Identifier) {
new_state = kStart;
} else if (IsComment(token_type)) {
new_state = kStart; // same state
} else {
Expand All @@ -256,6 +262,10 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState(
case kHaveNewline: {
if (IsNewlineOrEOF(token_type)) {
new_state = kHaveNewline; // same state
} else if (IsPreprocessorControlFlow(token_type)) {
new_state = kNewPartition;
} else if (token_type == verilog_tokentype::PP_Identifier) {
new_state = kStart;
} else if (IsComment(token_type)) {
new_state = kNewPartition;
} else {
Expand All @@ -266,6 +276,10 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState(
case kNewPartition: {
if (IsNewlineOrEOF(token_type)) {
new_state = kHaveNewline;
} else if (IsPreprocessorControlFlow(token_type)) {
new_state = kNewPartition;
} else if (token_type == verilog_tokentype::PP_Identifier) {
new_state = kStart;
} else if (IsComment(token_type)) {
new_state = kStart;
} else {
Expand All @@ -274,6 +288,13 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState(
break;
}
case kEndWithNewline:
if (IsPreprocessorControlFlow(token_type)) {
new_state = kNewPartition;
} else if (token_type == verilog_tokentype::PP_Identifier) {
new_state = kStart;
} else if (!IsComment(token_type)) {
new_state = kStart;
}
case kEndNoNewline: {
// Terminal states. Must Reset() next.
break;
Expand Down Expand Up @@ -2995,6 +3016,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
// TODO(fangism): always,initial,final should be handled the same way
case NodeEnum::kInitialStatement:
case NodeEnum::kFinalStatement: {
break; // TODO: maybe not needed?
// Check if 'initial' is separated from 'begin' by EOL_COMMENT. If so,
// adjust indentation and do not merge leaf into previous leaf.
const verible::UnwrappedLine& unwrapped_line = partition.Value();
Expand Down
Loading

0 comments on commit 10487a4

Please sign in to comment.