syntax/ast/
token_ext.rs

1//! There are many AstNodes, but only a few tokens, so we hand-write them here.
2
3use std::ops::Range;
4use std::{borrow::Cow, num::ParseIntError};
5
6use rustc_literal_escaper::{
7    EscapeError, MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
8    unescape_str,
9};
10use stdx::always;
11
12use crate::{
13    TextRange, TextSize,
14    ast::{self, AstToken},
15};
16
17impl ast::Comment {
18    pub fn kind(&self) -> CommentKind {
19        CommentKind::from_text(self.text())
20    }
21
22    pub fn is_doc(&self) -> bool {
23        self.kind().doc.is_some()
24    }
25
26    pub fn is_inner(&self) -> bool {
27        self.kind().doc == Some(CommentPlacement::Inner)
28    }
29
30    pub fn is_outer(&self) -> bool {
31        self.kind().doc == Some(CommentPlacement::Outer)
32    }
33
34    pub fn prefix(&self) -> &'static str {
35        let &(prefix, _kind) = CommentKind::BY_PREFIX
36            .iter()
37            .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
38            .unwrap();
39        prefix
40    }
41
42    /// Returns the textual content of a doc comment node as a single string with prefix and suffix
43    /// removed, plus the offset of the returned string from the beginning of the comment.
44    pub fn doc_comment(&self) -> Option<(&str, TextSize)> {
45        let kind = self.kind();
46        match kind {
47            CommentKind { shape, doc: Some(_) } => {
48                let prefix = kind.prefix();
49                let text = &self.text()[prefix.len()..];
50                let text = if shape == CommentShape::Block {
51                    text.strip_suffix("*/").unwrap_or(text)
52                } else {
53                    text
54                };
55                Some((text, TextSize::of(prefix)))
56            }
57            _ => None,
58        }
59    }
60}
61
62#[derive(Debug, PartialEq, Eq, Clone, Copy)]
63pub struct CommentKind {
64    pub shape: CommentShape,
65    pub doc: Option<CommentPlacement>,
66}
67
68#[derive(Debug, PartialEq, Eq, Clone, Copy)]
69pub enum CommentShape {
70    Line,
71    Block,
72}
73
74impl CommentShape {
75    pub fn is_line(self) -> bool {
76        self == CommentShape::Line
77    }
78
79    pub fn is_block(self) -> bool {
80        self == CommentShape::Block
81    }
82}
83
84#[derive(Debug, PartialEq, Eq, Clone, Copy)]
85pub enum CommentPlacement {
86    Inner,
87    Outer,
88}
89
90impl CommentKind {
91    const BY_PREFIX: [(&'static str, CommentKind); 9] = [
92        ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
93        ("/***", CommentKind { shape: CommentShape::Block, doc: None }),
94        ("////", CommentKind { shape: CommentShape::Line, doc: None }),
95        ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
96        ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
97        ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
98        ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
99        ("//", CommentKind { shape: CommentShape::Line, doc: None }),
100        ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
101    ];
102
103    pub(crate) fn from_text(text: &str) -> CommentKind {
104        let &(_prefix, kind) = CommentKind::BY_PREFIX
105            .iter()
106            .find(|&(prefix, _kind)| text.starts_with(prefix))
107            .unwrap();
108        kind
109    }
110
111    pub fn prefix(&self) -> &'static str {
112        let &(prefix, _) =
113            CommentKind::BY_PREFIX.iter().rev().find(|(_, kind)| kind == self).unwrap();
114        prefix
115    }
116}
117
118impl ast::Whitespace {
119    pub fn spans_multiple_lines(&self) -> bool {
120        let text = self.text();
121        text.find('\n').is_some_and(|idx| text[idx + 1..].contains('\n'))
122    }
123}
124
125#[derive(Debug)]
126pub struct QuoteOffsets {
127    pub quotes: (TextRange, TextRange),
128    pub contents: TextRange,
129}
130
131impl QuoteOffsets {
132    fn new(literal: &str) -> Option<QuoteOffsets> {
133        let left_quote = literal.find('"')?;
134        let right_quote = literal.rfind('"')?;
135        if left_quote == right_quote {
136            // `literal` only contains one quote
137            return None;
138        }
139
140        let start = TextSize::from(0);
141        let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
142        let right_quote = TextSize::try_from(right_quote).unwrap();
143        let end = TextSize::of(literal);
144
145        let res = QuoteOffsets {
146            quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
147            contents: TextRange::new(left_quote, right_quote),
148        };
149        Some(res)
150    }
151}
152
153pub trait IsString: AstToken {
154    fn raw_prefix(&self) -> &'static str;
155    fn unescape(&self, s: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>));
156    fn is_raw(&self) -> bool {
157        self.text().starts_with(self.raw_prefix())
158    }
159    fn quote_offsets(&self) -> Option<QuoteOffsets> {
160        let text = self.text();
161        let offsets = QuoteOffsets::new(text)?;
162        let o = self.syntax().text_range().start();
163        let offsets = QuoteOffsets {
164            quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
165            contents: offsets.contents + o,
166        };
167        Some(offsets)
168    }
169    fn text_range_between_quotes(&self) -> Option<TextRange> {
170        self.quote_offsets().map(|it| it.contents)
171    }
172    fn text_without_quotes(&self) -> &str {
173        let text = self.text();
174        let Some(offsets) = self.text_range_between_quotes() else { return text };
175        &text[offsets - self.syntax().text_range().start()]
176    }
177    fn open_quote_text_range(&self) -> Option<TextRange> {
178        self.quote_offsets().map(|it| it.quotes.0)
179    }
180    fn close_quote_text_range(&self) -> Option<TextRange> {
181        self.quote_offsets().map(|it| it.quotes.1)
182    }
183    fn escaped_char_ranges(&self, cb: &mut dyn FnMut(TextRange, Result<char, EscapeError>)) {
184        let Some(text_range_no_quotes) = self.text_range_between_quotes() else { return };
185
186        let start = self.syntax().text_range().start();
187        let text = &self.text()[text_range_no_quotes - start];
188        let offset = text_range_no_quotes.start() - start;
189
190        self.unescape(text, &mut |range: Range<usize>, unescaped_char| {
191            if let Some((s, e)) = range.start.try_into().ok().zip(range.end.try_into().ok()) {
192                cb(TextRange::new(s, e) + offset, unescaped_char);
193            }
194        });
195    }
196    fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
197        let contents_range = self.text_range_between_quotes()?;
198        if always!(TextRange::up_to(contents_range.len()).contains_range(range)) {
199            Some(range + contents_range.start())
200        } else {
201            None
202        }
203    }
204    fn map_offset_down(&self, offset: TextSize) -> Option<TextSize> {
205        let contents_range = self.text_range_between_quotes()?;
206        offset.checked_sub(contents_range.start())
207    }
208}
209
210impl IsString for ast::String {
211    fn raw_prefix(&self) -> &'static str {
212        "r"
213    }
214    fn unescape(&self, s: &str, cb: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
215        unescape_str(s, cb)
216    }
217}
218
219impl ast::String {
220    pub fn value(&self) -> Result<Cow<'_, str>, EscapeError> {
221        let text = self.text();
222        let text_range = self.text_range_between_quotes().ok_or(EscapeError::LoneSlash)?;
223        let text = &text[text_range - self.syntax().text_range().start()];
224        if self.is_raw() {
225            return Ok(Cow::Borrowed(text));
226        }
227
228        let mut buf = String::new();
229        let mut prev_end = 0;
230        let mut has_error = None;
231        unescape_str(text, |char_range, unescaped_char| {
232            match (unescaped_char, buf.capacity() == 0) {
233                (Ok(c), false) => buf.push(c),
234                (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
235                    prev_end = char_range.end
236                }
237                (Ok(c), true) => {
238                    buf.reserve_exact(text.len());
239                    buf.push_str(&text[..prev_end]);
240                    buf.push(c);
241                }
242                (Err(e), _) => has_error = Some(e),
243            }
244        });
245
246        match (has_error, buf.capacity() == 0) {
247            (Some(e), _) => Err(e),
248            (None, true) => Ok(Cow::Borrowed(text)),
249            (None, false) => Ok(Cow::Owned(buf)),
250        }
251    }
252}
253
254impl IsString for ast::ByteString {
255    fn raw_prefix(&self) -> &'static str {
256        "br"
257    }
258    fn unescape(&self, s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
259        unescape_byte_str(s, |range, res| callback(range, res.map(char::from)))
260    }
261}
262
263impl ast::ByteString {
264    pub fn value(&self) -> Result<Cow<'_, [u8]>, EscapeError> {
265        let text = self.text();
266        let text_range = self.text_range_between_quotes().ok_or(EscapeError::LoneSlash)?;
267        let text = &text[text_range - self.syntax().text_range().start()];
268        if self.is_raw() {
269            return Ok(Cow::Borrowed(text.as_bytes()));
270        }
271
272        let mut buf: Vec<u8> = Vec::new();
273        let mut prev_end = 0;
274        let mut has_error = None;
275        unescape_byte_str(text, |char_range, unescaped_byte| {
276            match (unescaped_byte, buf.capacity() == 0) {
277                (Ok(b), false) => buf.push(b),
278                (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
279                    prev_end = char_range.end
280                }
281                (Ok(b), true) => {
282                    buf.reserve_exact(text.len());
283                    buf.extend_from_slice(&text.as_bytes()[..prev_end]);
284                    buf.push(b);
285                }
286                (Err(e), _) => has_error = Some(e),
287            }
288        });
289
290        match (has_error, buf.capacity() == 0) {
291            (Some(e), _) => Err(e),
292            (None, true) => Ok(Cow::Borrowed(text.as_bytes())),
293            (None, false) => Ok(Cow::Owned(buf)),
294        }
295    }
296}
297
298impl IsString for ast::CString {
299    fn raw_prefix(&self) -> &'static str {
300        "cr"
301    }
302    // NOTE: This method should only be used for highlighting ranges. The unescaped
303    // char/byte is not used. For simplicity, we return an arbitrary placeholder char.
304    fn unescape(&self, s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
305        unescape_c_str(s, |range, _res| callback(range, Ok('_')))
306    }
307}
308
309impl ast::CString {
310    pub fn value(&self) -> Result<Cow<'_, [u8]>, EscapeError> {
311        let text = self.text();
312        let text_range = self.text_range_between_quotes().ok_or(EscapeError::LoneSlash)?;
313        let text = &text[text_range - self.syntax().text_range().start()];
314        if self.is_raw() {
315            return Ok(Cow::Borrowed(text.as_bytes()));
316        }
317
318        let mut buf = Vec::new();
319        let mut prev_end = 0;
320        let mut has_error = None;
321        let extend_unit = |buf: &mut Vec<u8>, unit: MixedUnit| match unit {
322            MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
323            MixedUnit::HighByte(b) => buf.push(b),
324        };
325        unescape_c_str(text, |char_range, unescaped| match (unescaped, buf.capacity() == 0) {
326            (Ok(u), false) => extend_unit(&mut buf, u),
327            (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
328                prev_end = char_range.end
329            }
330            (Ok(u), true) => {
331                buf.reserve_exact(text.len());
332                buf.extend(&text.as_bytes()[..prev_end]);
333                extend_unit(&mut buf, u);
334            }
335            (Err(e), _) => has_error = Some(e),
336        });
337
338        match (has_error, buf.capacity() == 0) {
339            (Some(e), _) => Err(e),
340            (None, true) => Ok(Cow::Borrowed(text.as_bytes())),
341            (None, false) => Ok(Cow::Owned(buf)),
342        }
343    }
344}
345
346impl ast::IntNumber {
347    pub fn radix(&self) -> Radix {
348        match self.text().get(..2).unwrap_or_default() {
349            "0b" => Radix::Binary,
350            "0o" => Radix::Octal,
351            "0x" => Radix::Hexadecimal,
352            _ => Radix::Decimal,
353        }
354    }
355
356    pub fn split_into_parts(&self) -> (&str, &str, &str) {
357        let radix = self.radix();
358        let (prefix, mut text) = self.text().split_at(radix.prefix_len());
359
360        let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
361            Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
362            _ => |(_, c)| c.is_ascii_alphabetic(),
363        };
364
365        let mut suffix = "";
366        if let Some((suffix_start, _)) = text.char_indices().find(is_suffix_start) {
367            let (text2, suffix2) = text.split_at(suffix_start);
368            text = text2;
369            suffix = suffix2;
370        };
371
372        (prefix, text, suffix)
373    }
374
375    pub fn value(&self) -> Result<u128, ParseIntError> {
376        let (_, text, _) = self.split_into_parts();
377        u128::from_str_radix(&text.replace('_', ""), self.radix() as u32)
378    }
379
380    pub fn suffix(&self) -> Option<&str> {
381        let (_, _, suffix) = self.split_into_parts();
382        if suffix.is_empty() { None } else { Some(suffix) }
383    }
384
385    pub fn value_string(&self) -> String {
386        let (_, text, _) = self.split_into_parts();
387        text.replace('_', "")
388    }
389}
390
391impl ast::FloatNumber {
392    pub fn split_into_parts(&self) -> (&str, &str) {
393        let text = self.text();
394        let mut float_text = self.text();
395        let mut suffix = "";
396        let mut indices = text.char_indices();
397        if let Some((mut suffix_start, c)) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())
398        {
399            if c == 'e' || c == 'E' {
400                if let Some(suffix_start_tuple) = indices.find(|(_, c)| c.is_ascii_alphabetic()) {
401                    suffix_start = suffix_start_tuple.0;
402
403                    float_text = &text[..suffix_start];
404                    suffix = &text[suffix_start..];
405                }
406            } else {
407                float_text = &text[..suffix_start];
408                suffix = &text[suffix_start..];
409            }
410        }
411
412        (float_text, suffix)
413    }
414
415    pub fn suffix(&self) -> Option<&str> {
416        let (_, suffix) = self.split_into_parts();
417        if suffix.is_empty() { None } else { Some(suffix) }
418    }
419
420    pub fn value_string(&self) -> String {
421        let (text, _) = self.split_into_parts();
422        text.replace('_', "")
423    }
424}
425
426#[derive(Debug, PartialEq, Eq, Copy, Clone)]
427pub enum Radix {
428    Binary = 2,
429    Octal = 8,
430    Decimal = 10,
431    Hexadecimal = 16,
432}
433
434impl Radix {
435    pub const ALL: &'static [Radix] =
436        &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
437
438    const fn prefix_len(self) -> usize {
439        match self {
440            Self::Decimal => 0,
441            _ => 2,
442        }
443    }
444}
445
446impl ast::Char {
447    pub fn value(&self) -> Result<char, EscapeError> {
448        let mut text = self.text();
449        if text.starts_with('\'') {
450            text = &text[1..];
451        } else {
452            return Err(EscapeError::ZeroChars);
453        }
454        if text.ends_with('\'') {
455            text = &text[0..text.len() - 1];
456        }
457
458        unescape_char(text)
459    }
460}
461
462impl ast::Byte {
463    pub fn value(&self) -> Result<u8, EscapeError> {
464        let mut text = self.text();
465        if text.starts_with("b\'") {
466            text = &text[2..];
467        } else {
468            return Err(EscapeError::ZeroChars);
469        }
470        if text.ends_with('\'') {
471            text = &text[0..text.len() - 1];
472        }
473
474        unescape_byte(text)
475    }
476}
477
478pub enum AnyString {
479    ByteString(ast::ByteString),
480    CString(ast::CString),
481    String(ast::String),
482}
483
484impl AnyString {
485    pub fn value(&self) -> Result<Cow<'_, str>, EscapeError> {
486        fn from_utf8(s: Cow<'_, [u8]>) -> Result<Cow<'_, str>, EscapeError> {
487            match s {
488                Cow::Borrowed(s) => str::from_utf8(s)
489                    .map_err(|_| EscapeError::NonAsciiCharInByte)
490                    .map(Cow::Borrowed),
491                Cow::Owned(s) => String::from_utf8(s)
492                    .map_err(|_| EscapeError::NonAsciiCharInByte)
493                    .map(Cow::Owned),
494            }
495        }
496
497        match self {
498            AnyString::String(s) => s.value(),
499            AnyString::ByteString(s) => s.value().and_then(from_utf8),
500            AnyString::CString(s) => s.value().and_then(from_utf8),
501        }
502    }
503}
504
505impl ast::AstToken for AnyString {
506    fn can_cast(kind: crate::SyntaxKind) -> bool {
507        ast::String::can_cast(kind)
508            || ast::ByteString::can_cast(kind)
509            || ast::CString::can_cast(kind)
510    }
511
512    fn cast(syntax: crate::SyntaxToken) -> Option<Self> {
513        ast::String::cast(syntax.clone())
514            .map(Self::String)
515            .or_else(|| ast::ByteString::cast(syntax.clone()).map(Self::ByteString))
516            .or_else(|| ast::CString::cast(syntax).map(Self::CString))
517    }
518
519    fn syntax(&self) -> &crate::SyntaxToken {
520        match self {
521            Self::ByteString(it) => it.syntax(),
522            Self::CString(it) => it.syntax(),
523            Self::String(it) => it.syntax(),
524        }
525    }
526}
527
528impl IsString for AnyString {
529    fn raw_prefix(&self) -> &'static str {
530        match self {
531            AnyString::ByteString(s) => s.raw_prefix(),
532            AnyString::CString(s) => s.raw_prefix(),
533            AnyString::String(s) => s.raw_prefix(),
534        }
535    }
536
537    fn unescape(&self, s: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
538        match self {
539            AnyString::ByteString(it) => it.unescape(s, callback),
540            AnyString::CString(it) => it.unescape(s, callback),
541            AnyString::String(it) => it.unescape(s, callback),
542        }
543    }
544}
545
546#[cfg(test)]
547mod tests {
548    use rustc_apfloat::ieee::Quad as f128;
549
550    use crate::ast::{self, FloatNumber, IntNumber, make};
551
552    fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
553        assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
554    }
555
556    fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
557        assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
558    }
559
560    // FIXME(#17451) Use `expected: f128` once `f128` is stabilised.
561    fn check_float_value(lit: &str, expected: &str) {
562        let expected = Some(expected.parse::<f128>().unwrap());
563        assert_eq!(
564            FloatNumber { syntax: make::tokens::literal(lit) }.value_string().parse::<f128>().ok(),
565            expected
566        );
567        assert_eq!(
568            IntNumber { syntax: make::tokens::literal(lit) }.value_string().parse::<f128>().ok(),
569            expected
570        );
571    }
572
573    fn check_int_value(lit: &str, expected: impl Into<Option<u128>>) {
574        assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.value().ok(), expected.into());
575    }
576
577    #[test]
578    fn test_float_number_suffix() {
579        check_float_suffix("123.0", None);
580        check_float_suffix("123f32", "f32");
581        check_float_suffix("123.0e", None);
582        check_float_suffix("123.0e4", None);
583        check_float_suffix("123.0ef16", "f16");
584        check_float_suffix("123.0E4f32", "f32");
585        check_float_suffix("1_2_3.0_f128", "f128");
586    }
587
588    #[test]
589    fn test_int_number_suffix() {
590        check_int_suffix("123", None);
591        check_int_suffix("123i32", "i32");
592        check_int_suffix("1_0_1_l_o_l", "l_o_l");
593        check_int_suffix("0b11", None);
594        check_int_suffix("0o11", None);
595        check_int_suffix("0xff", None);
596        check_int_suffix("0b11u32", "u32");
597        check_int_suffix("0o11u32", "u32");
598        check_int_suffix("0xffu32", "u32");
599    }
600
601    fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
602        assert_eq!(
603            ast::String { syntax: make::tokens::literal(&format!("\"{lit}\"")) }
604                .value()
605                .as_deref()
606                .ok(),
607            expected.into()
608        );
609    }
610
611    #[test]
612    fn test_string_escape() {
613        check_string_value(r"foobar", "foobar");
614        check_string_value(r"\foobar", None);
615        check_string_value(r"\nfoobar", "\nfoobar");
616        check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
617        check_string_value(r"\x61bcde", "abcde");
618        check_string_value(
619            r"a\
620bcde", "abcde",
621        );
622    }
623
624    fn check_byte_string_value<'a, const N: usize>(
625        lit: &str,
626        expected: impl Into<Option<&'a [u8; N]>>,
627    ) {
628        assert_eq!(
629            ast::ByteString { syntax: make::tokens::literal(&format!("b\"{lit}\"")) }
630                .value()
631                .as_deref()
632                .ok(),
633            expected.into().map(|value| &value[..])
634        );
635    }
636
637    #[test]
638    fn test_byte_string_escape() {
639        check_byte_string_value(r"foobar", b"foobar");
640        check_byte_string_value(r"\foobar", None::<&[u8; 0]>);
641        check_byte_string_value(r"\nfoobar", b"\nfoobar");
642        check_byte_string_value(r"C:\\Windows\\System32\\", b"C:\\Windows\\System32\\");
643        check_byte_string_value(r"\x61bcde", b"abcde");
644        check_byte_string_value(
645            r"a\
646bcde", b"abcde",
647        );
648    }
649
650    #[test]
651    fn test_value_underscores() {
652        check_float_value("1.3_4665449586950493453___6_f128", "1.346654495869504934536");
653        check_float_value("1.234567891011121_f64", "1.234567891011121");
654        check_float_value("1__0.__0__f32", "10.0");
655        check_float_value("3._0_f16", "3.0");
656        check_int_value("0b__1_0_", 2);
657        check_int_value("1_1_1_1_1_1", 111111);
658    }
659}