Skip to content

Commit 70a2a0f

Browse files
committed
Implement RFC 3503: frontmatters
Supercedes #137193
1 parent 0c33fe2 commit 70a2a0f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+958
-22
lines changed

compiler/rustc_ast_passes/src/feature_gate.rs

+1
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session, features: &Features) {
514514
gate_all!(contracts_internals, "contract internal machinery is for internal use only");
515515
gate_all!(where_clause_attrs, "attributes in `where` clause are unstable");
516516
gate_all!(super_let, "`super let` is experimental");
517+
gate_all!(frontmatter, "frontmatters are experimental");
517518

518519
if !visitor.features.never_patterns() {
519520
if let Some(spans) = spans.get(&sym::never_patterns) {

compiler/rustc_feature/src/unstable.rs

+2
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,8 @@ declare_features! (
506506
(incomplete, fn_delegation, "1.76.0", Some(118212)),
507507
/// Allows impls for the Freeze trait.
508508
(internal, freeze_impls, "1.78.0", Some(121675)),
509+
/// Frontmatter `---` blocks for use by external tools.
510+
(unstable, frontmatter, "CURRENT_RUSTC_VERSION", Some(136889)),
509511
/// Allows defining gen blocks and `gen fn`.
510512
(unstable, gen_blocks, "1.75.0", Some(117078)),
511513
/// Infer generic args for both consts and types.

compiler/rustc_lexer/src/cursor.rs

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
use std::str::Chars;
22

3+
pub enum FrontmatterAllowed {
4+
Yes,
5+
No,
6+
}
7+
38
/// Peekable iterator over a char sequence.
49
///
510
/// Next characters can be peeked via `first` method,
@@ -8,17 +13,19 @@ pub struct Cursor<'a> {
813
len_remaining: usize,
914
/// Iterator over chars. Slightly faster than a &str.
1015
chars: Chars<'a>,
16+
pub(crate) frontmatter_allowed: FrontmatterAllowed,
1117
#[cfg(debug_assertions)]
1218
prev: char,
1319
}
1420

1521
pub(crate) const EOF_CHAR: char = '\0';
1622

1723
impl<'a> Cursor<'a> {
18-
pub fn new(input: &'a str) -> Cursor<'a> {
24+
pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
1925
Cursor {
2026
len_remaining: input.len(),
2127
chars: input.chars(),
28+
frontmatter_allowed,
2229
#[cfg(debug_assertions)]
2330
prev: EOF_CHAR,
2431
}
@@ -95,6 +102,11 @@ impl<'a> Cursor<'a> {
95102
Some(c)
96103
}
97104

105+
/// Moves to a substring by a number of bytes.
106+
pub(crate) fn bump_bytes(&mut self, n: usize) {
107+
self.chars = self.as_str()[n..].chars();
108+
}
109+
98110
/// Eats symbols while predicate returns true or until the end of file is reached.
99111
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
100112
// It was tried making optimized version of this for eg. line comments, but

compiler/rustc_lexer/src/lib.rs

+145-7
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;
3535

3636
use self::LiteralKind::*;
3737
use self::TokenKind::*;
38-
pub use crate::cursor::Cursor;
3938
use crate::cursor::EOF_CHAR;
39+
pub use crate::cursor::{Cursor, FrontmatterAllowed};
4040

4141
/// Parsed token.
4242
/// It doesn't contain information about data that has been parsed,
@@ -57,17 +57,27 @@ impl Token {
5757
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
5858
pub enum TokenKind {
5959
/// A line comment, e.g. `// comment`.
60-
LineComment { doc_style: Option<DocStyle> },
60+
LineComment {
61+
doc_style: Option<DocStyle>,
62+
},
6163

6264
/// A block comment, e.g. `/* block comment */`.
6365
///
6466
/// Block comments can be recursive, so a sequence like `/* /* */`
6567
/// will not be considered terminated and will result in a parsing error.
66-
BlockComment { doc_style: Option<DocStyle>, terminated: bool },
68+
BlockComment {
69+
doc_style: Option<DocStyle>,
70+
terminated: bool,
71+
},
6772

6873
/// Any whitespace character sequence.
6974
Whitespace,
7075

76+
Frontmatter {
77+
has_invalid_preceding_whitespace: bool,
78+
invalid_infostring: bool,
79+
},
80+
7181
/// An identifier or keyword, e.g. `ident` or `continue`.
7282
Ident,
7383

@@ -109,10 +119,15 @@ pub enum TokenKind {
109119
/// this type will need to check for and reject that case.
110120
///
111121
/// See [LiteralKind] for more details.
112-
Literal { kind: LiteralKind, suffix_start: u32 },
122+
Literal {
123+
kind: LiteralKind,
124+
suffix_start: u32,
125+
},
113126

114127
/// A lifetime, e.g. `'a`.
115-
Lifetime { starts_with_number: bool },
128+
Lifetime {
129+
starts_with_number: bool,
130+
},
116131

117132
/// `;`
118133
Semi,
@@ -280,7 +295,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
280295
#[inline]
281296
pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> {
282297
debug_assert!(!input.is_empty());
283-
let mut cursor = Cursor::new(input);
298+
let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
284299
// Move past the leading `r` or `br`.
285300
for _ in 0..prefix_len {
286301
cursor.bump().unwrap();
@@ -290,7 +305,7 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
290305

291306
/// Creates an iterator that produces tokens from the input string.
292307
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> {
293-
let mut cursor = Cursor::new(input);
308+
let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
294309
std::iter::from_fn(move || {
295310
let token = cursor.advance_token();
296311
if token.kind != TokenKind::Eof { Some(token) } else { None }
@@ -361,7 +376,34 @@ impl Cursor<'_> {
361376
Some(c) => c,
362377
None => return Token::new(TokenKind::Eof, 0),
363378
};
379+
364380
let token_kind = match first_char {
381+
c if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
382+
&& is_whitespace(c) =>
383+
{
384+
let mut last = first_char;
385+
while is_whitespace(self.first()) {
386+
let Some(c) = self.bump() else {
387+
break;
388+
};
389+
last = c;
390+
}
391+
// invalid frontmatter opening as whitespace preceding it isn't newline.
392+
// combine the whitespace and the frontmatter to a single token as we shall
393+
// error later.
394+
if last != '\n' && self.as_str().starts_with("---") {
395+
self.bump();
396+
self.frontmatter(true)
397+
} else {
398+
Whitespace
399+
}
400+
}
401+
'-' if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
402+
&& self.as_str().starts_with("--") =>
403+
{
404+
// happy path
405+
self.frontmatter(false)
406+
}
365407
// Slash, comment or block comment.
366408
'/' => match self.first() {
367409
'/' => self.line_comment(),
@@ -464,11 +506,107 @@ impl Cursor<'_> {
464506
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
465507
_ => Unknown,
466508
};
509+
if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
510+
&& !matches!(token_kind, Whitespace)
511+
{
512+
// stop allowing frontmatters after first non-whitespace token
513+
self.frontmatter_allowed = FrontmatterAllowed::No;
514+
}
467515
let res = Token::new(token_kind, self.pos_within_token());
468516
self.reset_pos_within_token();
469517
res
470518
}
471519

520+
/// Given that one `-` was eaten, eat the rest of the frontmatter.
521+
fn frontmatter(&mut self, has_invalid_preceding_whitespace: bool) -> TokenKind {
522+
debug_assert_eq!('-', self.prev());
523+
524+
let pos = self.pos_within_token();
525+
self.eat_while(|c| c == '-');
526+
527+
// one `-` is eaten by the caller.
528+
let length_opening = self.pos_within_token() - pos + 1;
529+
530+
// must be ensured by the caller
531+
debug_assert!(length_opening >= 3);
532+
533+
// copied from `eat_identifier`, but allows `.` in infostring to allow something like
534+
// `---Cargo.toml` as a valid opener
535+
if is_id_start(self.first()) {
536+
self.bump();
537+
self.eat_while(|c| is_id_continue(c) || c == '.');
538+
}
539+
540+
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
541+
let invalid_infostring = self.first() != '\n';
542+
543+
let mut s = self.as_str();
544+
let mut found = false;
545+
while let Some(closing) = s.find(&"-".repeat(length_opening as usize)) {
546+
let preceding_chars_start = s[..closing].rfind("\n").map_or(0, |i| i + 1);
547+
if s[preceding_chars_start..closing].chars().all(is_whitespace) {
548+
// candidate found
549+
self.bump_bytes(closing);
550+
// in case like
551+
// ---cargo
552+
// --- blahblah
553+
// or
554+
// ---cargo
555+
// ----
556+
// combine those stuff into this frontmatter token such that it gets detected later.
557+
self.eat_until(b'\n');
558+
found = true;
559+
break;
560+
} else {
561+
s = &s[closing + length_opening as usize..];
562+
}
563+
}
564+
565+
if !found {
566+
// recovery strategy: a closing statement might have precending whitespace/newline
567+
// but not have enough dashes to properly close. In this case, we eat until there,
568+
// and report a mismatch in the parser.
569+
let mut rest = self.as_str();
570+
// We can look for a shorter closing (starting with four dashes but closing with three)
571+
// and other indications that Rust has started and the infostring has ended.
572+
let mut potential_closing = rest
573+
.find("\n---")
574+
// n.b. only in the case where there are dashes, we move the index to the line where
575+
// the dashes start as we eat to include that line. For other cases those are Rust code
576+
// and not included in the frontmatter.
577+
.map(|x| x + 1)
578+
.or_else(|| rest.find("\nuse"))
579+
.or_else(|| rest.find("\n//!"))
580+
.or_else(|| rest.find("\n#!["));
581+
582+
if potential_closing.is_none() {
583+
// a less fortunate recovery if all else fails which finds any dashes preceded by whitespace
584+
// on a standalone line. Might be wrong.
585+
while let Some(closing) = rest.find("---") {
586+
let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
587+
if rest[preceding_chars_start..closing].chars().all(is_whitespace) {
588+
// candidate found
589+
potential_closing = Some(closing);
590+
break;
591+
} else {
592+
rest = &rest[closing + 3..];
593+
}
594+
}
595+
}
596+
597+
if let Some(potential_closing) = potential_closing {
598+
// bump to the potential closing, and eat everything on that line.
599+
self.bump_bytes(potential_closing);
600+
self.eat_until(b'\n');
601+
} else {
602+
// eat everything. this will get reported as an unclosed frontmatter.
603+
self.eat_while(|_| true);
604+
}
605+
}
606+
607+
Frontmatter { has_invalid_preceding_whitespace, invalid_infostring }
608+
}
609+
472610
fn line_comment(&mut self) -> TokenKind {
473611
debug_assert!(self.prev() == '/' && self.first() == '/');
474612
self.bump();

compiler/rustc_lexer/src/tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use super::*;
44

55
fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) {
66
let s = &format!("r{}", s);
7-
let mut cursor = Cursor::new(s);
7+
let mut cursor = Cursor::new(s, FrontmatterAllowed::No);
88
cursor.bump();
99
let res = cursor.raw_double_quoted_string(0);
1010
assert_eq!(res, expected);

compiler/rustc_parse/messages.ftl

+13
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,19 @@ parse_forgot_paren = perhaps you forgot parentheses?
297297
parse_found_expr_would_be_stmt = expected expression, found `{$token}`
298298
.label = expected expression
299299
300+
parse_frontmatter_extra_characters_after_close = extra characters after frontmatter close are not allowed
301+
parse_frontmatter_invalid_close_preceding_whitespace = invalid preceding whitespace for frontmatter close
302+
.note = frontmatter close should not be preceded by whitespace
303+
parse_frontmatter_invalid_infostring = invalid infostring for frontmatter
304+
.note = frontmatter infostrings must be a single identifier immediately following the opening
305+
parse_frontmatter_invalid_opening_preceding_whitespace = invalid preceding whitespace for frontmatter opening
306+
.note = frontmatter opening should not be preceded by whitespace
307+
parse_frontmatter_length_mismatch = frontmatter close does not match the opening
308+
.label_opening = the opening here has {$len_opening} dashes...
309+
.label_close = ...while the close has {$len_close} dashes
310+
parse_frontmatter_unclosed = unclosed frontmatter
311+
.note = frontmatter opening here was not closed
312+
300313
parse_function_body_equals_expr = function body cannot be `= expression;`
301314
.suggestion = surround the expression with `{"{"}` and `{"}"}` instead of `=` and `;`
302315

compiler/rustc_parse/src/errors.rs

+55
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,61 @@ pub(crate) struct FoundExprWouldBeStmt {
735735
pub suggestion: ExprParenthesesNeeded,
736736
}
737737

738+
#[derive(Diagnostic)]
739+
#[diag(parse_frontmatter_extra_characters_after_close)]
740+
pub(crate) struct FrontmatterExtraCharactersAfterClose {
741+
#[primary_span]
742+
pub span: Span,
743+
}
744+
745+
#[derive(Diagnostic)]
746+
#[diag(parse_frontmatter_invalid_infostring)]
747+
#[note]
748+
pub(crate) struct FrontmatterInvalidInfostring {
749+
#[primary_span]
750+
pub span: Span,
751+
}
752+
753+
#[derive(Diagnostic)]
754+
#[diag(parse_frontmatter_invalid_opening_preceding_whitespace)]
755+
pub(crate) struct FrontmatterInvalidOpeningPrecedingWhitespace {
756+
#[primary_span]
757+
pub span: Span,
758+
#[note]
759+
pub note_span: Span,
760+
}
761+
762+
#[derive(Diagnostic)]
763+
#[diag(parse_frontmatter_unclosed)]
764+
pub(crate) struct FrontmatterUnclosed {
765+
#[primary_span]
766+
pub span: Span,
767+
#[note]
768+
pub note_span: Span,
769+
}
770+
771+
#[derive(Diagnostic)]
772+
#[diag(parse_frontmatter_invalid_close_preceding_whitespace)]
773+
pub(crate) struct FrontmatterInvalidClosingPrecedingWhitespace {
774+
#[primary_span]
775+
pub span: Span,
776+
#[note]
777+
pub note_span: Span,
778+
}
779+
780+
#[derive(Diagnostic)]
781+
#[diag(parse_frontmatter_length_mismatch)]
782+
pub(crate) struct FrontmatterLengthMismatch {
783+
#[primary_span]
784+
pub span: Span,
785+
#[label(parse_label_opening)]
786+
pub opening: Span,
787+
#[label(parse_label_close)]
788+
pub close: Span,
789+
pub len_opening: usize,
790+
pub len_close: usize,
791+
}
792+
738793
#[derive(Diagnostic)]
739794
#[diag(parse_leading_plus_not_supported)]
740795
pub(crate) struct LeadingPlusNotSupported {

0 commit comments

Comments
 (0)