vfs/
file_set.rs

1//! Partitions a list of files into disjoint subsets.
2//!
3//! Files which do not belong to any explicitly configured `FileSet` belong to
4//! the default `FileSet`.
5use std::fmt;
6
7use fst::{IntoStreamer, Streamer};
8use indexmap::IndexMap;
9use rustc_hash::{FxBuildHasher, FxHashMap};
10
11use crate::{AnchoredPath, FileId, Vfs, VfsPath};
12
13/// A set of [`VfsPath`]s identified by [`FileId`]s.
14#[derive(Default, Clone, Eq, PartialEq)]
15pub struct FileSet {
16    files: FxHashMap<VfsPath, FileId>,
17    paths: IndexMap<FileId, VfsPath, FxBuildHasher>,
18}
19
20impl FileSet {
21    /// Returns the number of stored paths.
22    pub fn len(&self) -> usize {
23        self.files.len()
24    }
25
26    /// Get the id of the file corresponding to `path`.
27    ///
28    /// If either `path`'s [`anchor`](AnchoredPath::anchor) or the resolved path is not in
29    /// the set, returns [`None`].
30    pub fn resolve_path(&self, path: AnchoredPath<'_>) -> Option<FileId> {
31        let mut base = self.paths[&path.anchor].clone();
32        base.pop();
33        let path = base.join(path.path)?;
34        self.files.get(&path).copied()
35    }
36
37    /// Get the id corresponding to `path` if it exists in the set.
38    pub fn file_for_path(&self, path: &VfsPath) -> Option<&FileId> {
39        self.files.get(path)
40    }
41
42    /// Get the path corresponding to `file` if it exists in the set.
43    pub fn path_for_file(&self, file: &FileId) -> Option<&VfsPath> {
44        self.paths.get(file)
45    }
46
47    /// Insert the `file_id, path` pair into the set.
48    ///
49    /// # Note
50    /// Multiple [`FileId`] can be mapped to the same [`VfsPath`], and vice-versa.
51    pub fn insert(&mut self, file_id: FileId, path: VfsPath) {
52        self.files.insert(path.clone(), file_id);
53        self.paths.insert(file_id, path);
54    }
55
56    /// Iterate over this set's ids.
57    pub fn iter(&self) -> impl Iterator<Item = FileId> + '_ {
58        self.paths.keys().copied()
59    }
60}
61
62impl fmt::Debug for FileSet {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        f.debug_struct("FileSet").field("n_files", &self.files.len()).finish()
65    }
66}
67
68/// This contains path prefixes to partition a [`Vfs`] into [`FileSet`]s.
69///
70/// # Example
71/// ```rust
72/// # use vfs::{file_set::FileSetConfigBuilder, VfsPath, Vfs};
73/// let mut builder = FileSetConfigBuilder::default();
74/// builder.add_file_set(vec![VfsPath::new_virtual_path("/src".to_string())]);
75/// let config = builder.build();
76/// let mut file_system = Vfs::default();
77/// file_system.set_file_contents(VfsPath::new_virtual_path("/src/main.rs".to_string()), Some(vec![]));
78/// file_system.set_file_contents(VfsPath::new_virtual_path("/src/lib.rs".to_string()), Some(vec![]));
79/// file_system.set_file_contents(VfsPath::new_virtual_path("/build.rs".to_string()), Some(vec![]));
80/// // contains the sets :
81/// // { "/src/main.rs", "/src/lib.rs" }
82/// // { "build.rs" }
83/// let sets = config.partition(&file_system);
84/// ```
85#[derive(Debug)]
86pub struct FileSetConfig {
87    /// Number of sets that `self` can partition a [`Vfs`] into.
88    ///
89    /// This should be the number of sets in `self.map` + 1 for files that don't fit in any
90    /// defined set.
91    n_file_sets: usize,
92    /// Map from encoded paths to the set they belong to.
93    map: fst::Map<Vec<u8>>,
94}
95
96impl Default for FileSetConfig {
97    fn default() -> Self {
98        FileSetConfig::builder().build()
99    }
100}
101
102impl FileSetConfig {
103    /// Returns a builder for `FileSetConfig`.
104    pub fn builder() -> FileSetConfigBuilder {
105        FileSetConfigBuilder::default()
106    }
107
108    /// Partition `vfs` into `FileSet`s.
109    ///
110    /// Creates a new [`FileSet`] for every set of prefixes in `self`.
111    pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> {
112        let mut scratch_space = Vec::new();
113        let mut res = vec![FileSet::default(); self.len()];
114        for (file_id, path) in vfs.iter() {
115            let root = self.classify(path, &mut scratch_space);
116            res[root].insert(file_id, path.clone());
117        }
118        res
119    }
120
121    /// Number of sets that `self` can partition a [`Vfs`] into.
122    fn len(&self) -> usize {
123        self.n_file_sets
124    }
125
126    /// Get the lexicographically ordered vector of the underlying map.
127    pub fn roots(&self) -> Vec<(Vec<u8>, u64)> {
128        self.map.stream().into_byte_vec()
129    }
130
131    /// Returns the set index for the given `path`.
132    ///
133    /// `scratch_space` is used as a buffer and will be entirely replaced.
134    fn classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize {
135        // `path` is a file, but r-a only cares about the containing directory. We don't
136        // want `/foo/bar_baz.rs` to be attributed to source root directory `/foo/bar`.
137        let path = path.parent().unwrap_or_else(|| path.clone());
138
139        scratch_space.clear();
140        path.encode(scratch_space);
141        let automaton = PrefixOf::new(scratch_space.as_slice());
142        let mut longest_prefix = self.len() - 1;
143        let mut stream = self.map.search(automaton).into_stream();
144        while let Some((_, v)) = stream.next() {
145            longest_prefix = v as usize;
146        }
147        longest_prefix
148    }
149}
150
151/// Builder for [`FileSetConfig`].
152#[derive(Default)]
153pub struct FileSetConfigBuilder {
154    roots: Vec<Vec<VfsPath>>,
155}
156
157impl FileSetConfigBuilder {
158    /// Returns the number of sets currently held.
159    pub fn len(&self) -> usize {
160        self.roots.len()
161    }
162
163    /// Add a new set of paths prefixes.
164    pub fn add_file_set(&mut self, roots: Vec<VfsPath>) {
165        self.roots.push(roots);
166    }
167
168    /// Build the `FileSetConfig`.
169    pub fn build(self) -> FileSetConfig {
170        let n_file_sets = self.roots.len() + 1;
171        let map = {
172            let mut entries = Vec::new();
173            for (i, paths) in self.roots.into_iter().enumerate() {
174                for p in paths {
175                    let mut buf = Vec::new();
176                    p.encode(&mut buf);
177                    entries.push((buf, i as u64));
178                }
179            }
180            entries.sort();
181            entries.dedup_by(|(a, _), (b, _)| a == b);
182            fst::Map::from_iter(entries).unwrap()
183        };
184        FileSetConfig { n_file_sets, map }
185    }
186}
187
188/// Implements [`fst::Automaton`]
189///
190/// It will match if `prefix_of` is a prefix of the given data.
191struct PrefixOf<'a> {
192    prefix_of: &'a [u8],
193}
194
195impl<'a> PrefixOf<'a> {
196    /// Creates a new `PrefixOf` from the given slice.
197    fn new(prefix_of: &'a [u8]) -> Self {
198        Self { prefix_of }
199    }
200}
201
202impl fst::Automaton for PrefixOf<'_> {
203    type State = usize;
204    fn start(&self) -> usize {
205        0
206    }
207    fn is_match(&self, &state: &usize) -> bool {
208        state != !0
209    }
210    fn can_match(&self, &state: &usize) -> bool {
211        state != !0
212    }
213    fn accept(&self, &state: &usize, byte: u8) -> usize {
214        if self.prefix_of.get(state) == Some(&byte) { state + 1 } else { !0 }
215    }
216}
217
218#[cfg(test)]
219mod tests;