parser/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
//! The Rust parser.
//!
//! NOTE: The crate is undergoing refactors, don't believe everything the docs
//! say :-)
//!
//! The parser doesn't know about concrete representation of tokens and syntax
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. As
//! a consequence, this crate does not contain a lexer.
//!
//! The [`Parser`] struct from the [`parser`] module is a cursor into the
//! sequence of tokens. Parsing routines use [`Parser`] to inspect current
//! state and advance the parsing.
//!
//! The actual parsing happens in the [`grammar`] module.
//!
//! Tests for this crate live in the `syntax` crate.
//!
//! [`Parser`]: crate::parser::Parser
#![allow(rustdoc::private_intra_doc_links)]
#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]
#[cfg(not(feature = "in-rust-tree"))]
extern crate ra_ap_rustc_lexer as rustc_lexer;
#[cfg(feature = "in-rust-tree")]
extern crate rustc_lexer;
mod event;
mod grammar;
mod input;
mod lexed_str;
mod output;
mod parser;
mod shortcuts;
mod syntax_kind;
mod token_set;
#[cfg(test)]
mod tests;
pub(crate) use token_set::TokenSet;
pub use edition::Edition;
pub use crate::{
input::Input,
lexed_str::LexedStr,
output::{Output, Step},
shortcuts::StrStep,
syntax_kind::SyntaxKind,
};
/// Parse the whole of the input as a given syntactic construct.
///
/// This covers two main use-cases:
///
/// * Parsing a Rust file.
/// * Parsing a result of macro expansion.
///
/// That is, for something like
///
/// ```
/// quick_check! {
/// fn prop() {}
/// }
/// ```
///
/// the input to the macro will be parsed with [`PrefixEntryPoint::Item`], and
/// the result will be [`TopEntryPoint::MacroItems`].
///
/// [`TopEntryPoint::parse`] makes a guarantee that
/// * all input is consumed
/// * the result is a valid tree (there's one root node)
#[derive(Debug)]
pub enum TopEntryPoint {
SourceFile,
MacroStmts,
MacroItems,
Pattern,
Type,
Expr,
/// Edge case -- macros generally don't expand to attributes, with the
/// exception of `cfg_attr` which does!
MetaItem,
}
impl TopEntryPoint {
pub fn parse(&self, input: &Input, edition: Edition) -> Output {
let _p = tracing::info_span!("TopEntryPoint::parse", ?self).entered();
let entry_point: fn(&'_ mut parser::Parser<'_>) = match self {
TopEntryPoint::SourceFile => grammar::entry::top::source_file,
TopEntryPoint::MacroStmts => grammar::entry::top::macro_stmts,
TopEntryPoint::MacroItems => grammar::entry::top::macro_items,
TopEntryPoint::Pattern => grammar::entry::top::pattern,
TopEntryPoint::Type => grammar::entry::top::type_,
TopEntryPoint::Expr => grammar::entry::top::expr,
TopEntryPoint::MetaItem => grammar::entry::top::meta_item,
};
let mut p = parser::Parser::new(input, edition);
entry_point(&mut p);
let events = p.finish();
let res = event::process(events);
if cfg!(debug_assertions) {
let mut depth = 0;
let mut first = true;
for step in res.iter() {
assert!(depth > 0 || first);
first = false;
match step {
Step::Enter { .. } => depth += 1,
Step::Exit => depth -= 1,
Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
depth -= 1 + !has_pseudo_dot as usize
}
Step::Token { .. } | Step::Error { .. } => (),
}
}
assert!(!first, "no tree at all");
assert_eq!(depth, 0, "unbalanced tree");
}
res
}
}
/// Parse a prefix of the input as a given syntactic construct.
///
/// This is used by macro-by-example parser to implement things like `$i:item`
/// and the naming of variants follows the naming of macro fragments.
///
/// Note that this is generally non-optional -- the result is intentionally not
/// `Option<Output>`. The way MBE work, by the time we *try* to parse `$e:expr`
/// we already commit to expression. In other words, this API by design can't be
/// used to implement "rollback and try another alternative" logic.
#[derive(Debug)]
pub enum PrefixEntryPoint {
Vis,
Block,
Stmt,
Pat,
PatTop,
Ty,
Expr,
Path,
Item,
MetaItem,
}
impl PrefixEntryPoint {
pub fn parse(&self, input: &Input, edition: Edition) -> Output {
let entry_point: fn(&'_ mut parser::Parser<'_>) = match self {
PrefixEntryPoint::Vis => grammar::entry::prefix::vis,
PrefixEntryPoint::Block => grammar::entry::prefix::block,
PrefixEntryPoint::Stmt => grammar::entry::prefix::stmt,
PrefixEntryPoint::Pat => grammar::entry::prefix::pat,
PrefixEntryPoint::PatTop => grammar::entry::prefix::pat_top,
PrefixEntryPoint::Ty => grammar::entry::prefix::ty,
PrefixEntryPoint::Expr => grammar::entry::prefix::expr,
PrefixEntryPoint::Path => grammar::entry::prefix::path,
PrefixEntryPoint::Item => grammar::entry::prefix::item,
PrefixEntryPoint::MetaItem => grammar::entry::prefix::meta_item,
};
let mut p = parser::Parser::new(input, edition);
entry_point(&mut p);
let events = p.finish();
event::process(events)
}
}
/// A parsing function for a specific braced-block.
pub struct Reparser(fn(&mut parser::Parser<'_>));
impl Reparser {
/// If the node is a braced block, return the corresponding `Reparser`.
pub fn for_node(
node: SyntaxKind,
first_child: Option<SyntaxKind>,
parent: Option<SyntaxKind>,
) -> Option<Reparser> {
grammar::reparser(node, first_child, parent).map(Reparser)
}
/// Re-parse given tokens using this `Reparser`.
///
/// Tokens must start with `{`, end with `}` and form a valid brace
/// sequence.
pub fn parse(self, tokens: &Input, edition: Edition) -> Output {
let Reparser(r) = self;
let mut p = parser::Parser::new(tokens, edition);
r(&mut p);
let events = p.finish();
event::process(events)
}
}