From 30fbee878102b2e3c60ece38c0ee2b25039f03b5 Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Thu, 3 Oct 2024 07:47:17 -0700 Subject: [PATCH] Give newline token symbolic name; print raw tokens c-escaped. The newline token can mean `'\n'`, but also `'\r\n'`, so printing the token symbolic name as simply `<<\n>>` can be confusing in that context; instead, print it as symbolic name `<>`. Also: the actual raw text of the token is printed in a c-escaped way now, which makes it easier to human-read if special characters are involved (and possibly easier to process when grepping through results). --- common/text/BUILD | 1 + common/text/token_info.cc | 6 ++++-- verilog/parser/verilog.y | 6 +++--- verilog/tools/syntax/verilog_syntax_test.sh | 6 ++---- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/common/text/BUILD b/common/text/BUILD index d0b1814f4..475db5d5b 100644 --- a/common/text/BUILD +++ b/common/text/BUILD @@ -26,6 +26,7 @@ cc_library( "//common/util:iterator-range", "//common/util:logging", "//common/util:range", + "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:string_view", ], ) diff --git a/common/text/token_info.cc b/common/text/token_info.cc index c42189468..7e00a94d3 100644 --- a/common/text/token_info.cc +++ b/common/text/token_info.cc @@ -20,6 +20,7 @@ #include #include +#include "absl/strings/escaping.h" #include "absl/strings/string_view.h" #include "common/strings/rebase.h" #include "common/text/constants.h" @@ -53,14 +54,15 @@ std::ostream &TokenInfo::ToStream(std::ostream &output_stream, output_stream << "(#"; context.token_enum_translator(output_stream, token_enum_); output_stream << " @" << left(context.base) << '-' << right(context.base) - << ": \"" << text_ << "\")"; + << ": \"" << absl::CEscape(text_) << "\")"; const auto dist = std::distance(context.base.end(), text_.end()); CHECK(IsSubRange(text_, context.base)) << "text.end() is off by " << dist; return output_stream; } std::ostream &TokenInfo::ToStream(std::ostream &output_stream) const { - return output_stream << "(#" << token_enum_ << ": \"" << text_ << "\")"; + return output_stream << "(#" << token_enum_ << ": \"" << absl::CEscape(text_) + << "\")"; } std::string TokenInfo::ToString(const Context &context) const { diff --git a/verilog/parser/verilog.y b/verilog/parser/verilog.y index a0ae63d82..2c04b85e5 100644 --- a/verilog/parser/verilog.y +++ b/verilog/parser/verilog.y @@ -667,7 +667,7 @@ is not locally defined, so the grammar here uses only generic identifiers. %token TK_COMMENT_BLOCK "/∗comment∗/" %token TK_EOL_COMMENT "// end of line comment" %token TK_SPACE "<>" /* includes tabs */ -%token TK_NEWLINE "<<\\n>>" +%token TK_NEWLINE "<>" %token TK_LINE_CONT "<<\\line-cont>>" %token TK_ATTRIBUTE "(*attribute*)" @@ -3518,12 +3518,12 @@ instantiation_base { $$ = MakeInstantiationBase($1, $2); } /* * TODO: support mixed anonymous declarations - * + * * This production rule was commented out because it caused * verible-verilog-syntax to crash for some inputs. It may be necessary to * re-enable it in the future to support declarations that mix anonymous and * named instances. - * + * * For more details, see https://github.com/chipsalliance/verible/issues/2181 */ // | reference call_base ',' gate_instance_or_register_variable_list diff --git a/verilog/tools/syntax/verilog_syntax_test.sh b/verilog/tools/syntax/verilog_syntax_test.sh index 1bb357802..8925e438c 100755 --- a/verilog/tools/syntax/verilog_syntax_test.sh +++ b/verilog/tools/syntax/verilog_syntax_test.sh @@ -307,11 +307,9 @@ All lexed tokens: (#"<>" @6-7: " ") (#SymbolIdentifier @7-9: "mm") (#';' @9-10: ";") -(#"<<\\\\n>>" @10-11: " -") +(#"<>" @10-11: "\\n") (#"endmodule" @11-20: "endmodule") -(#"<<\\\\n>>" @20-21: " -") +(#"<>" @20-21: "\\n") (#"end of file" @21-21: "") EOF