parser/
parser.rs

1//! See [`Parser`].
2
3use std::cell::Cell;
4
5use drop_bomb::DropBomb;
6
7use crate::{
8    Edition,
9    SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
10    T, TokenSet,
11    event::Event,
12    input::Input,
13};
14
15/// `Parser` struct provides the low-level API for
16/// navigating through the stream of tokens and
17/// constructing the parse tree. The actual parsing
18/// happens in the [`grammar`](super::grammar) module.
19///
20/// However, the result of this `Parser` is not a real
21/// tree, but rather a flat stream of events of the form
22/// "start expression, consume number literal,
23/// finish expression". See `Event` docs for more.
24pub(crate) struct Parser<'t> {
25    inp: &'t Input,
26    pos: usize,
27    events: Vec<Event>,
28    steps: Cell<u32>,
29}
30
31const PARSER_STEP_LIMIT: usize = if cfg!(debug_assertions) { 150_000 } else { 15_000_000 };
32
33impl<'t> Parser<'t> {
34    pub(super) fn new(inp: &'t Input) -> Parser<'t> {
35        Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0) }
36    }
37
38    pub(crate) fn finish(self) -> Vec<Event> {
39        self.events
40    }
41
42    /// Returns the kind of the current token.
43    /// If parser has already reached the end of input,
44    /// the special `EOF` kind is returned.
45    pub(crate) fn current(&self) -> SyntaxKind {
46        self.nth(0)
47    }
48
49    /// Lookahead operation: returns the kind of the next nth
50    /// token.
51    pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
52        assert!(n <= 3);
53
54        let steps = self.steps.get();
55        assert!((steps as usize) < PARSER_STEP_LIMIT, "the parser seems stuck");
56        self.steps.set(steps + 1);
57
58        self.inp.kind(self.pos + n)
59    }
60
61    /// Checks if the current token is `kind`.
62    pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
63        self.nth_at(0, kind)
64    }
65
66    pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
67        match kind {
68            T![-=] => self.at_composite2(n, T![-], T![=]),
69            T![->] => self.at_composite2(n, T![-], T![>]),
70            T![::] => self.at_composite2(n, T![:], T![:]),
71            T![!=] => self.at_composite2(n, T![!], T![=]),
72            T![..] => self.at_composite2(n, T![.], T![.]),
73            T![*=] => self.at_composite2(n, T![*], T![=]),
74            T![/=] => self.at_composite2(n, T![/], T![=]),
75            T![&&] => self.at_composite2(n, T![&], T![&]),
76            T![&=] => self.at_composite2(n, T![&], T![=]),
77            T![%=] => self.at_composite2(n, T![%], T![=]),
78            T![^=] => self.at_composite2(n, T![^], T![=]),
79            T![+=] => self.at_composite2(n, T![+], T![=]),
80            T![<<] => self.at_composite2(n, T![<], T![<]),
81            T![<=] => self.at_composite2(n, T![<], T![=]),
82            T![==] => self.at_composite2(n, T![=], T![=]),
83            T![=>] => self.at_composite2(n, T![=], T![>]),
84            T![>=] => self.at_composite2(n, T![>], T![=]),
85            T![>>] => self.at_composite2(n, T![>], T![>]),
86            T![|=] => self.at_composite2(n, T![|], T![=]),
87            T![||] => self.at_composite2(n, T![|], T![|]),
88
89            T![...] => self.at_composite3(n, T![.], T![.], T![.]),
90            T![..=] => self.at_composite3(n, T![.], T![.], T![=]),
91            T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
92            T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
93
94            _ => self.inp.kind(self.pos + n) == kind,
95        }
96    }
97
98    /// Consume the next token if `kind` matches.
99    pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
100        if !self.at(kind) {
101            return false;
102        }
103        let n_raw_tokens = match kind {
104            T![-=]
105            | T![->]
106            | T![::]
107            | T![!=]
108            | T![..]
109            | T![*=]
110            | T![/=]
111            | T![&&]
112            | T![&=]
113            | T![%=]
114            | T![^=]
115            | T![+=]
116            | T![<<]
117            | T![<=]
118            | T![==]
119            | T![=>]
120            | T![>=]
121            | T![>>]
122            | T![|=]
123            | T![||] => 2,
124
125            T![...] | T![..=] | T![<<=] | T![>>=] => 3,
126            _ => 1,
127        };
128        self.do_bump(kind, n_raw_tokens);
129        true
130    }
131
132    pub(crate) fn eat_contextual_kw(&mut self, kind: SyntaxKind) -> bool {
133        if !self.at_contextual_kw(kind) {
134            return false;
135        }
136        self.bump_remap(kind);
137        true
138    }
139
140    fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
141        self.inp.kind(self.pos + n) == k1
142            && self.inp.kind(self.pos + n + 1) == k2
143            && self.inp.is_joint(self.pos + n)
144    }
145
146    fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
147        self.inp.kind(self.pos + n) == k1
148            && self.inp.kind(self.pos + n + 1) == k2
149            && self.inp.kind(self.pos + n + 2) == k3
150            && self.inp.is_joint(self.pos + n)
151            && self.inp.is_joint(self.pos + n + 1)
152    }
153
154    /// Checks if the current token is in `kinds`.
155    pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
156        kinds.contains(self.current())
157    }
158
159    /// Checks if the current token is contextual keyword `kw`.
160    pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
161        self.inp.contextual_kind(self.pos) == kw
162    }
163
164    /// Checks if the nth token is contextual keyword `kw`.
165    pub(crate) fn nth_at_contextual_kw(&self, n: usize, kw: SyntaxKind) -> bool {
166        self.inp.contextual_kind(self.pos + n) == kw
167    }
168
169    /// Starts a new node in the syntax tree. All nodes and tokens
170    /// consumed between the `start` and the corresponding `Marker::complete`
171    /// belong to the same node.
172    pub(crate) fn start(&mut self) -> Marker {
173        let pos = self.events.len() as u32;
174        self.push_event(Event::tombstone());
175        Marker::new(pos)
176    }
177
178    /// Consume the next token. Panics if the parser isn't currently at `kind`.
179    pub(crate) fn bump(&mut self, kind: SyntaxKind) {
180        assert!(self.eat(kind));
181    }
182
183    /// Advances the parser by one token
184    pub(crate) fn bump_any(&mut self) {
185        let kind = self.nth(0);
186        if kind == EOF {
187            return;
188        }
189        self.do_bump(kind, 1);
190    }
191
192    /// Advances the parser by one token
193    pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) {
194        assert!(self.at(SyntaxKind::FLOAT_NUMBER));
195        // we have parse `<something>.`
196        // `<something>`.0.1
197        // here we need to insert an extra event
198        //
199        // `<something>`. 0. 1;
200        // here we need to change the follow up parse, the return value will cause us to emulate a dot
201        // the actual splitting happens later
202        let ends_in_dot = !self.inp.is_joint(self.pos);
203        if !ends_in_dot {
204            let new_marker = self.start();
205            let idx = marker.pos as usize;
206            match &mut self.events[idx] {
207                Event::Start { forward_parent, kind } => {
208                    *kind = SyntaxKind::FIELD_EXPR;
209                    *forward_parent = Some(new_marker.pos - marker.pos);
210                }
211                _ => unreachable!(),
212            }
213            marker.bomb.defuse();
214            marker = new_marker;
215        };
216        self.pos += 1;
217        self.push_event(Event::FloatSplitHack { ends_in_dot });
218        (ends_in_dot, marker)
219    }
220
221    /// Advances the parser by one token, remapping its kind.
222    /// This is useful to create contextual keywords from
223    /// identifiers. For example, the lexer creates a `union`
224    /// *identifier* token, but the parser remaps it to the
225    /// `union` keyword, and keyword is what ends up in the
226    /// final tree.
227    pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
228        if self.nth(0) == EOF {
229            // FIXME: panic!?
230            return;
231        }
232        self.do_bump(kind, 1);
233    }
234
235    /// Emit error with the `message`
236    /// FIXME: this should be much more fancy and support
237    /// structured errors with spans and notes, like rustc
238    /// does.
239    pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
240        let msg = message.into();
241        self.push_event(Event::Error { msg });
242    }
243
244    /// Consume the next token if it is `kind` or emit an error
245    /// otherwise.
246    pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
247        if self.eat(kind) {
248            return true;
249        }
250        self.error(format!("expected {kind:?}"));
251        false
252    }
253
254    /// Create an error node and consume the next token.
255    pub(crate) fn err_and_bump(&mut self, message: &str) {
256        let m = self.start();
257        self.error(message);
258        self.bump_any();
259        m.complete(self, ERROR);
260    }
261
262    /// Create an error node and consume the next token unless it is in the recovery set.
263    ///
264    /// Returns true if recovery kicked in.
265    pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) -> bool {
266        if matches!(self.current(), T!['{'] | T!['}']) {
267            self.error(message);
268            return true;
269        }
270
271        if self.at_ts(recovery) {
272            self.error(message);
273            return true;
274        }
275
276        let m = self.start();
277        self.error(message);
278        self.bump_any();
279        m.complete(self, ERROR);
280        false
281    }
282
283    fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
284        self.pos += n_raw_tokens as usize;
285        self.steps.set(0);
286        self.push_event(Event::Token { kind, n_raw_tokens });
287    }
288
289    fn push_event(&mut self, event: Event) {
290        self.events.push(event);
291    }
292
293    pub(crate) fn current_edition(&self) -> Edition {
294        self.inp.edition(self.pos)
295    }
296}
297
298/// See [`Parser::start`].
299pub(crate) struct Marker {
300    pos: u32,
301    bomb: DropBomb,
302}
303
304impl Marker {
305    fn new(pos: u32) -> Marker {
306        Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
307    }
308
309    /// Finishes the syntax tree node and assigns `kind` to it,
310    /// and mark the create a `CompletedMarker` for possible future
311    /// operation like `.precede()` to deal with forward_parent.
312    pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
313        self.bomb.defuse();
314        let idx = self.pos as usize;
315        match &mut p.events[idx] {
316            Event::Start { kind: slot, .. } => {
317                *slot = kind;
318            }
319            _ => unreachable!(),
320        }
321        p.push_event(Event::Finish);
322        let end_pos = p.events.len() as u32;
323        CompletedMarker::new(self.pos, end_pos, kind)
324    }
325
326    /// Abandons the syntax tree node. All its children
327    /// are attached to its parent instead.
328    pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
329        self.bomb.defuse();
330        let idx = self.pos as usize;
331        if idx == p.events.len() - 1 {
332            assert!(matches!(
333                p.events.pop(),
334                Some(Event::Start { kind: TOMBSTONE, forward_parent: None })
335            ));
336        }
337    }
338}
339
340pub(crate) struct CompletedMarker {
341    start_pos: u32,
342    end_pos: u32,
343    kind: SyntaxKind,
344}
345
346impl CompletedMarker {
347    fn new(start_pos: u32, end_pos: u32, kind: SyntaxKind) -> Self {
348        CompletedMarker { start_pos, end_pos, kind }
349    }
350
351    /// This method allows to create a new node which starts
352    /// *before* the current one. That is, parser could start
353    /// node `A`, then complete it, and then after parsing the
354    /// whole `A`, decide that it should have started some node
355    /// `B` before starting `A`. `precede` allows to do exactly
356    /// that. See also docs about
357    /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent).
358    ///
359    /// Given completed events `[START, FINISH]` and its corresponding
360    /// `CompletedMarker(pos: 0, _)`.
361    /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
362    /// then mark `NEWSTART` as `START`'s parent with saving its relative
363    /// distance to `NEWSTART` into forward_parent(=2 in this case);
364    pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
365        let new_pos = p.start();
366        let idx = self.start_pos as usize;
367        match &mut p.events[idx] {
368            Event::Start { forward_parent, .. } => {
369                *forward_parent = Some(new_pos.pos - self.start_pos);
370            }
371            _ => unreachable!(),
372        }
373        new_pos
374    }
375
376    /// Extends this completed marker *to the left* up to `m`.
377    pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
378        m.bomb.defuse();
379        let idx = m.pos as usize;
380        match &mut p.events[idx] {
381            Event::Start { forward_parent, .. } => {
382                *forward_parent = Some(self.start_pos - m.pos);
383            }
384            _ => unreachable!(),
385        }
386        self
387    }
388
389    pub(crate) fn kind(&self) -> SyntaxKind {
390        self.kind
391    }
392
393    pub(crate) fn last_token(&self, p: &Parser<'_>) -> Option<SyntaxKind> {
394        let end_pos = self.end_pos as usize;
395        debug_assert_eq!(p.events[end_pos - 1], Event::Finish);
396        p.events[..end_pos].iter().rev().find_map(|event| match event {
397            Event::Token { kind, .. } => Some(*kind),
398            _ => None,
399        })
400    }
401}