Skip to content

Commit 516d0fc

Browse files
ianprime0509richerfu
authored andcommitted
std.zig.tokenizer: simplify line-based tokens
Closes ziglang#21358 Closes ziglang#21360 This commit modifies the `multiline_string_literal_line`, `doc_comment`, and `container_doc_comment` tokens to no longer include the line ending as part of the token. This makes it easier to handle line endings (which may be LF, CRLF, or in edge cases possibly nonexistent) consistently. In the two issues linked above, Autodoc was already assuming this for doc comments, and yielding incorrect results when handling files with CRLF line endings (both in Markdown parsing and source rendering). Applying the same simplification for multiline string literals also brings `zig fmt` into conformance with ziglang/zig-spec#38 regarding formatting of multiline strings with CRLF line endings: the spec says that `zig fmt` should remove the CR from such line endings, but this was not previously the case.
1 parent 7437f3b commit 516d0fc

File tree

4 files changed

+40
-11
lines changed

4 files changed

+40
-11
lines changed

lib/std/zig/AstGen.zig

+2-4
Original file line numberDiff line numberDiff line change
@@ -11721,16 +11721,14 @@ fn strLitNodeAsString(astgen: *AstGen, node: Ast.Node.Index) !IndexSlice {
1172111721
var tok_i = start;
1172211722
{
1172311723
const slice = tree.tokenSlice(tok_i);
11724-
const carriage_return_ending: usize = if (slice[slice.len - 2] == '\r') 2 else 1;
11725-
const line_bytes = slice[2 .. slice.len - carriage_return_ending];
11724+
const line_bytes = slice[2..];
1172611725
try string_bytes.appendSlice(gpa, line_bytes);
1172711726
tok_i += 1;
1172811727
}
1172911728
// Following lines: each line prepends a newline.
1173011729
while (tok_i <= end) : (tok_i += 1) {
1173111730
const slice = tree.tokenSlice(tok_i);
11732-
const carriage_return_ending: usize = if (slice[slice.len - 2] == '\r') 2 else 1;
11733-
const line_bytes = slice[2 .. slice.len - carriage_return_ending];
11731+
const line_bytes = slice[2..];
1173411732
try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len + 1);
1173511733
string_bytes.appendAssumeCapacity('\n');
1173611734
string_bytes.appendSliceAssumeCapacity(line_bytes);

lib/std/zig/parser_test.zig

+38
Original file line numberDiff line numberDiff line change
@@ -3087,6 +3087,22 @@ test "zig fmt: multiline string" {
30873087
);
30883088
}
30893089

3090+
test "zig fmt: multiline string with CRLF line endings" {
3091+
try testTransform("" ++
3092+
"const s =\r\n" ++
3093+
" \\\\one\r\n" ++
3094+
" \\\\two)\r\n" ++
3095+
" \\\\three\r\n" ++
3096+
";\r\n",
3097+
\\const s =
3098+
\\ \\one
3099+
\\ \\two)
3100+
\\ \\three
3101+
\\;
3102+
\\
3103+
);
3104+
}
3105+
30903106
test "zig fmt: values" {
30913107
try testCanonical(
30923108
\\test "values" {
@@ -4404,6 +4420,28 @@ test "zig fmt: invalid doc comments on comptime and test blocks" {
44044420
});
44054421
}
44064422

4423+
test "zig fmt: comments with CRLF line endings" {
4424+
try testTransform("" ++
4425+
"//! Top-level doc comment\r\n" ++
4426+
"//! Continuing to another line\r\n" ++
4427+
"\r\n" ++
4428+
"/// Regular doc comment\r\n" ++
4429+
"const S = struct {\r\n" ++
4430+
" // Regular comment\r\n" ++
4431+
" // More content\r\n" ++
4432+
"};\r\n",
4433+
\\//! Top-level doc comment
4434+
\\//! Continuing to another line
4435+
\\
4436+
\\/// Regular doc comment
4437+
\\const S = struct {
4438+
\\ // Regular comment
4439+
\\ // More content
4440+
\\};
4441+
\\
4442+
);
4443+
}
4444+
44074445
test "zig fmt: else comptime expr" {
44084446
try testCanonical(
44094447
\\comptime {

lib/std/zig/render.zig

-3
Original file line numberDiff line numberDiff line change
@@ -3170,9 +3170,6 @@ fn discardAllParams(r: *Render, fn_proto_node: Ast.Node.Index) Error!void {
31703170
fn tokenSliceForRender(tree: Ast, token_index: Ast.TokenIndex) []const u8 {
31713171
var ret = tree.tokenSlice(token_index);
31723172
switch (tree.tokens.items(.tag)[token_index]) {
3173-
.multiline_string_literal_line => {
3174-
if (ret[ret.len - 1] == '\n') ret.len -= 1;
3175-
},
31763173
.container_doc_comment, .doc_comment => {
31773174
ret = mem.trimRight(u8, ret, &std.ascii.whitespace);
31783175
},

lib/std/zig/tokenizer.zig

-4
Original file line numberDiff line numberDiff line change
@@ -847,12 +847,10 @@ pub const Tokenizer = struct {
847847
break;
848848
},
849849
'\n' => {
850-
self.index += 1;
851850
break;
852851
},
853852
'\r' => {
854853
if (self.buffer[self.index + 1] == '\n') {
855-
self.index += 2;
856854
break;
857855
} else {
858856
state = .invalid;
@@ -1117,7 +1115,6 @@ pub const Tokenizer = struct {
11171115
},
11181116
'\r' => {
11191117
if (self.buffer[self.index + 1] == '\n') {
1120-
self.index += 1;
11211118
result.tag = .doc_comment;
11221119
break;
11231120
} else {
@@ -1167,7 +1164,6 @@ pub const Tokenizer = struct {
11671164
},
11681165
'\r' => {
11691166
if (self.buffer[self.index + 1] == '\n') {
1170-
self.index += 1;
11711167
break;
11721168
} else {
11731169
state = .invalid;

0 commit comments

Comments
 (0)