parser/event.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
//! This module provides a way to construct a `File`.
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
use std::mem;
use crate::{
output::Output,
SyntaxKind::{self, *},
};
/// `Parser` produces a flat list of `Event`s.
/// They are converted to a tree-structure in
/// a separate pass, via `TreeBuilder`.
#[derive(Debug)]
pub(crate) enum Event {
/// This event signifies the start of the node.
/// It should be either abandoned (in which case the
/// `kind` is `TOMBSTONE`, and the event is ignored),
/// or completed via a `Finish` event.
///
/// All tokens between a `Start` and a `Finish` would
/// become the children of the respective node.
///
/// For left-recursive syntactic constructs, the parser produces
/// a child node before it sees a parent. `forward_parent`
/// saves the position of current event's parent.
///
/// Consider this path
///
/// foo::bar
///
/// The events for it would look like this:
///
/// ```text
/// START(PATH) IDENT('foo') FINISH START(PATH) T![::] IDENT('bar') FINISH
/// | /\
/// | |
/// +------forward-parent------+
/// ```
///
/// And the tree would look like this
///
/// ```text
/// +--PATH---------+
/// | | |
/// | | |
/// | '::' 'bar'
/// |
/// PATH
/// |
/// 'foo'
/// ```
///
/// See also `CompletedMarker::precede`.
Start {
kind: SyntaxKind,
forward_parent: Option<u32>,
},
/// Complete the previous `Start` event
Finish,
/// Produce a single leaf-element.
/// `n_raw_tokens` is used to glue complex contextual tokens.
/// For example, lexer tokenizes `>>` as `>`, `>`, and
/// `n_raw_tokens = 2` is used to produced a single `>>`.
Token {
kind: SyntaxKind,
n_raw_tokens: u8,
},
/// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
/// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
/// This event instructs whatever consumes the events to split the float literal into
/// the corresponding parts.
FloatSplitHack {
ends_in_dot: bool,
},
Error {
msg: String,
},
}
impl Event {
pub(crate) fn tombstone() -> Self {
Event::Start { kind: TOMBSTONE, forward_parent: None }
}
}
/// Generate the syntax tree with the control of events.
pub(super) fn process(mut events: Vec<Event>) -> Output {
let mut res = Output::default();
let mut forward_parents = Vec::new();
for i in 0..events.len() {
match mem::replace(&mut events[i], Event::tombstone()) {
Event::Start { kind, forward_parent } => {
// For events[A, B, C], B is A's forward_parent, C is B's forward_parent,
// in the normal control flow, the parent-child relation: `A -> B -> C`,
// while with the magic forward_parent, it writes: `C <- B <- A`.
// append `A` into parents.
forward_parents.push(kind);
let mut idx = i;
let mut fp = forward_parent;
while let Some(fwd) = fp {
idx += fwd as usize;
// append `A`'s forward_parent `B`
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Event::Start { kind, forward_parent } => {
forward_parents.push(kind);
forward_parent
}
_ => unreachable!(),
};
// append `B`'s forward_parent `C` in the next stage.
}
for kind in forward_parents.drain(..).rev() {
if kind != TOMBSTONE {
res.enter_node(kind);
}
}
}
Event::Finish => res.leave_node(),
Event::Token { kind, n_raw_tokens } => {
res.token(kind, n_raw_tokens);
}
Event::FloatSplitHack { ends_in_dot } => {
res.float_split_hack(ends_in_dot);
let ev = mem::replace(&mut events[i + 1], Event::tombstone());
assert!(matches!(ev, Event::Finish), "{ev:?}");
}
Event::Error { msg } => res.error(msg),
}
}
res
}