Skip to content

Commit 75c8844

Browse files
committed
designate doc comments
1 parent eeb0702 commit 75c8844

File tree

3 files changed

+173
-10
lines changed

3 files changed

+173
-10
lines changed

corpus/source_files.txt

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,86 @@ Line comments
5555
(source_file
5656
(line_comment))
5757

58+
============================================
59+
Doc comments
60+
============================================
61+
62+
/// Doc
63+
/// Comment
64+
// / Now a line comment (note the space separating the third slash)
65+
66+
/// Doc
67+
/// Comment
68+
//// Four slashes makes the line a normal comment
69+
/// Doc comment got interrupted by the line above
70+
71+
//! Inner doc comment line 1
72+
//! Inner doc comment line 2
73+
/// This is different doc comment since the line starts differently
74+
//! Back to inner doc comment
75+
76+
/// Same doc comment
77+
/// But with arbitrary indentation
78+
79+
/// Different doc comments
80+
81+
/// Are separated by newlines
82+
83+
----
84+
85+
(source_file
86+
(doc_comment)
87+
(line_comment)
88+
(doc_comment)
89+
(line_comment)
90+
(doc_comment)
91+
(doc_comment)
92+
(doc_comment)
93+
(doc_comment)
94+
(doc_comment)
95+
(doc_comment)
96+
(doc_comment))
97+
98+
=================================================
99+
Doc comments recursion guard 1 (should not hang)
100+
=================================================
101+
102+
//!
103+
104+
---
105+
106+
(source_file (doc_comment))
107+
108+
=================================================
109+
Doc comments recursion guard 2 (should not hang)
110+
=================================================
111+
112+
///
113+
114+
---
115+
116+
(source_file (doc_comment))
117+
118+
=================================================
119+
Comments recursion guard 1 (should not hang)
120+
=================================================
121+
122+
//
123+
124+
---
125+
126+
(source_file (line_comment))
127+
128+
=================================================
129+
Block comments recursion guard 1 (should not hang)
130+
=================================================
131+
132+
/*
133+
134+
---
135+
136+
(source_file (ERROR))
137+
58138
=====================================
59139
Greek letters in identifiers
60140
=====================================

grammar.js

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,15 @@ const primitive_types = numeric_types.concat(['bool', 'str', 'char'])
3838
module.exports = grammar({
3939
name: 'rust',
4040

41-
extras: $ => [/\s/, $.line_comment, $.block_comment],
41+
extras: $ => [/\s/, $.line_comment, $.block_comment, $.doc_comment],
4242

4343
externals: $ => [
4444
$._string_content,
4545
$.raw_string_literal,
4646
$.float_literal,
4747
$.block_comment,
48+
$.line_comment,
49+
$.doc_comment
4850
],
4951

5052
supertypes: $ => [
@@ -1426,15 +1428,6 @@ module.exports = grammar({
14261428

14271429
boolean_literal: $ => choice('true', 'false'),
14281430

1429-
comment: $ => choice(
1430-
$.line_comment,
1431-
$.block_comment
1432-
),
1433-
1434-
line_comment: $ => token(seq(
1435-
'//', /.*/
1436-
)),
1437-
14381431
_path: $ => choice(
14391432
$.self,
14401433
alias(choice(...primitive_types), $.identifier),

src/scanner.c

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ enum TokenType {
66
RAW_STRING_LITERAL,
77
FLOAT_LITERAL,
88
BLOCK_COMMENT,
9+
LINE_COMMENT,
10+
DOC_COMMENT,
911
};
1012

1113
void *tree_sitter_rust_external_scanner_create() { return NULL; }
@@ -143,7 +145,95 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer,
143145

144146
if (lexer->lookahead == '/') {
145147
advance(lexer);
148+
149+
if ((valid_symbols[LINE_COMMENT] || valid_symbols[DOC_COMMENT]) && lexer->lookahead == '/') {
150+
advance(lexer);
151+
152+
bool started_with_slash = lexer->lookahead == '/';
153+
switch (lexer->lookahead) {
154+
case '!':
155+
case '/': {
156+
advance(lexer);
157+
158+
// If three consecutive slashes were seen and this is the fourth one,
159+
// the line turns back to a normal comment.
160+
// The above rule does not apply for "//!" which is also a doc
161+
// comment, hence why it is relevant to track started_with_slash.
162+
if (started_with_slash == false || lexer->lookahead != '/') {
163+
lexer->result_symbol = DOC_COMMENT;
164+
165+
while (true) {
166+
while (true) {
167+
switch (lexer->lookahead) {
168+
case '\n': {
169+
lexer->mark_end(lexer);
170+
advance(lexer);
171+
goto finished_doc_comment_line;
172+
}
173+
case 0: {
174+
goto doc_comment_exit;
175+
}
176+
default: {
177+
advance(lexer);
178+
}
179+
}
180+
}
181+
182+
finished_doc_comment_line:
183+
184+
while (isblank(lexer->lookahead)) lexer->advance(lexer, false);
185+
186+
if (lexer->lookahead == '\n') {
187+
// Even if there's another comment ahead, it'll be part of a
188+
// separate node. Break here.
189+
break;
190+
}
191+
192+
if (lexer->lookahead == '/') {
193+
advance(lexer);
194+
if (lexer->lookahead == '/') {
195+
advance(lexer);
196+
if (started_with_slash) {
197+
if (lexer->lookahead == '/') {
198+
advance(lexer);
199+
// If a fourth slash is found, the line turns back to a normal comment
200+
if (lexer->lookahead == '/') {
201+
break;
202+
}
203+
} else {
204+
break;
205+
}
206+
} else if (lexer->lookahead != '!') {
207+
break;
208+
}
209+
} else {
210+
break;
211+
}
212+
} else {
213+
break;
214+
}
215+
}
216+
}
217+
218+
break;
219+
}
220+
}
221+
222+
doc_comment_exit:
223+
224+
// Might have already processed a doc comment in the loop above
225+
if (lexer->result_symbol != DOC_COMMENT) {
226+
lexer->result_symbol = LINE_COMMENT;
227+
while (lexer->lookahead != '\n' && lexer->lookahead != 0) {
228+
advance(lexer);
229+
}
230+
}
231+
232+
return true;
233+
}
234+
146235
if (lexer->lookahead != '*') return false;
236+
147237
advance(lexer);
148238

149239
bool after_star = false;

0 commit comments

Comments
 (0)