FileIndex.h
7.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
//===--- FileIndex.h - Index for files. ---------------------------- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// FileIndex implements SymbolIndex for symbols from a set of files. Symbols are
// maintained at source-file granularity (e.g. with ASTs), and files can be
// updated dynamically.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
#include "Headers.h"
#include "Index.h"
#include "MemIndex.h"
#include "Merge.h"
#include "index/CanonicalIncludes.h"
#include "index/Ref.h"
#include "index/Relation.h"
#include "index/Serialization.h"
#include "index/Symbol.h"
#include "support/Path.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/CompilationDatabase.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include <memory>
#include <vector>
namespace clang {
class ASTContext;
namespace clangd {
class ParsedAST;
/// Select between in-memory index implementations, which have tradeoffs.
enum class IndexType {
// MemIndex is trivially cheap to build, but has poor query performance.
Light,
// Dex is relatively expensive to build and uses more memory, but is fast.
Heavy,
};
/// How to handle duplicated symbols across multiple files.
enum class DuplicateHandling {
// Pick a random symbol. Less accurate but faster.
PickOne,
// Merge symbols. More accurate but slower.
Merge,
};
/// A container of slabs associated with a key. It can be updated at key
/// granularity, replacing all slabs belonging to a key with a new set. Keys are
/// usually file paths/uris.
///
/// This implements snapshot semantics. Each update will create a new snapshot
/// for all slabs of the Key. Snapshots are managed with shared pointers that
/// are shared between this class and the users. For each key, this class only
/// stores a pointer pointing to the newest snapshot, and an outdated snapshot
/// is deleted by the last owner of the snapshot, either this class or the
/// symbol index.
///
/// The snapshot semantics keeps critical sections minimal since we only need
/// locking when we swap or obtain references to snapshots.
class FileSymbols {
public:
/// Updates all slabs associated with the \p Key.
/// If either is nullptr, corresponding data for \p Key will be removed.
/// If CountReferences is true, \p Refs will be used for counting references
/// during merging.
void update(llvm::StringRef Key, std::unique_ptr<SymbolSlab> Symbols,
std::unique_ptr<RefSlab> Refs,
std::unique_ptr<RelationSlab> Relations, bool CountReferences);
/// The index keeps the slabs alive.
/// Will count Symbol::References based on number of references in the main
/// files, while building the index with DuplicateHandling::Merge option.
/// Version is populated with an increasing sequence counter.
std::unique_ptr<SymbolIndex>
buildIndex(IndexType,
DuplicateHandling DuplicateHandle = DuplicateHandling::PickOne,
size_t *Version = nullptr);
private:
struct RefSlabAndCountReferences {
std::shared_ptr<RefSlab> Slab;
bool CountReferences = false;
};
mutable std::mutex Mutex;
size_t Version = 0;
llvm::StringMap<std::shared_ptr<SymbolSlab>> SymbolsSnapshot;
llvm::StringMap<RefSlabAndCountReferences> RefsSnapshot;
llvm::StringMap<std::shared_ptr<RelationSlab>> RelationsSnapshot;
};
/// This manages symbols from files and an in-memory index on all symbols.
/// FIXME: Expose an interface to remove files that are closed.
class FileIndex : public MergedIndex {
public:
FileIndex(bool UseDex = true, bool CollectMainFileRefs = false);
/// Update preamble symbols of file \p Path with all declarations in \p AST
/// and macros in \p PP.
void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST,
std::shared_ptr<Preprocessor> PP,
const CanonicalIncludes &Includes);
/// Update symbols and references from main file \p Path with
/// `indexMainDecls`.
void updateMain(PathRef Path, ParsedAST &AST);
private:
bool UseDex; // FIXME: this should be always on.
bool CollectMainFileRefs;
// Contains information from each file's preamble only. Symbols and relations
// are sharded per declaration file to deduplicate multiple symbols and reduce
// memory usage.
// Missing information:
// - symbol refs (these are always "from the main file")
// - definition locations in the main file
//
// Note that we store only one version of a header, hence symbols appearing in
// different PP states will be missing.
FileSymbols PreambleSymbols;
SwapIndex PreambleIndex;
// Contains information from each file's main AST.
// These are updated frequently (on file change), but are relatively small.
// Mostly contains:
// - refs to symbols declared in the preamble and referenced from main
// - symbols declared both in the main file and the preamble
// (Note that symbols *only* in the main file are not indexed).
FileSymbols MainFileSymbols;
SwapIndex MainFileIndex;
// While both the FileIndex and SwapIndex are threadsafe, we need to track
// versions to ensure that we don't overwrite newer indexes with older ones.
std::mutex UpdateIndexMu;
unsigned MainIndexVersion = 0;
unsigned PreambleIndexVersion = 0;
};
using SlabTuple = std::tuple<SymbolSlab, RefSlab, RelationSlab>;
/// Retrieves symbols and refs of local top level decls in \p AST (i.e.
/// `AST.getLocalTopLevelDecls()`).
/// Exposed to assist in unit tests.
SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs = false);
/// Index declarations from \p AST and macros from \p PP that are declared in
/// included headers.
SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
std::shared_ptr<Preprocessor> PP,
const CanonicalIncludes &Includes);
/// Takes slabs coming from a TU (multiple files) and shards them per
/// declaration location.
struct FileShardedIndex {
/// \p HintPath is used to convert file URIs stored in symbols into absolute
/// paths.
explicit FileShardedIndex(IndexFileIn Input);
/// Returns uris for all files that has a shard.
std::vector<llvm::StringRef> getAllSources() const;
/// Generates index shard for the \p Uri. Note that this function results in
/// a copy of all the relevant data.
/// Returned index will always have Symbol/Refs/Relation Slabs set, even if
/// they are empty.
llvm::Optional<IndexFileIn> getShard(llvm::StringRef Uri) const;
private:
// Contains all the information that belongs to a single file.
struct FileShard {
// Either declared or defined in the file.
llvm::DenseSet<const Symbol *> Symbols;
// Reference occurs in the file.
llvm::DenseSet<const Ref *> Refs;
// Subject is declared in the file.
llvm::DenseSet<const Relation *> Relations;
// Contains edges for only the direct includes.
IncludeGraph IG;
};
// Keeps all the information alive.
const IndexFileIn Index;
// Mapping from URIs to slab information.
llvm::StringMap<FileShard> Shards;
// Used to build RefSlabs.
llvm::DenseMap<const Ref *, SymbolID> RefToSymID;
};
} // namespace clangd
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H