Dexp.cpp
13.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
//===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a simple interactive tool which can be used to manually
// evaluate symbol search quality of Clangd index.
//
//===----------------------------------------------------------------------===//
#include "index/Serialization.h"
#include "index/dex/Dex.h"
#include "index/remote/Client.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/LineEditor/LineEditor.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Signals.h"
namespace clang {
namespace clangd {
namespace {
llvm::cl::opt<std::string> IndexLocation(
llvm::cl::desc("<path to index file | remote:server.address>"),
llvm::cl::Positional);
llvm::cl::opt<std::string>
ExecCommand("c", llvm::cl::desc("Command to execute and then exit."));
llvm::cl::opt<std::string> ProjectRoot(
"project-root",
llvm::cl::desc(
"Path to the project. Required when connecting using remote index."));
static constexpr char Overview[] = R"(
This is an **experimental** interactive tool to process user-provided search
queries over given symbol collection obtained via clangd-indexer. The
tool can be used to evaluate search quality of existing index implementations
and manually construct non-trivial test cases.
You can connect to remote index by passing remote:address to dexp. Example:
$ dexp remote:0.0.0.0:9000
Type use "help" request to get information about the details.
)";
void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
const auto TimerStart = std::chrono::high_resolution_clock::now();
F();
const auto TimerStop = std::chrono::high_resolution_clock::now();
const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
TimerStop - TimerStart);
llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
}
std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
const SymbolIndex *Index) {
FuzzyFindRequest Request;
// Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
// qualifier for global scope.
bool IsGlobalScope = QualifiedName.consume_front("::");
auto Names = splitQualifiedName(QualifiedName);
if (IsGlobalScope || !Names.first.empty())
Request.Scopes = {std::string(Names.first)};
else
// QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
// add the global scope to the request.
Request.Scopes = {""};
Request.Query = std::string(Names.second);
std::vector<SymbolID> SymIDs;
Index->fuzzyFind(Request, [&](const Symbol &Sym) {
std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
if (QualifiedName == SymQualifiedName)
SymIDs.push_back(Sym.ID);
});
return SymIDs;
}
// REPL commands inherit from Command and contain their options as members.
// Creating a Command populates parser options, parseAndRun() resets them.
class Command {
// By resetting the parser options, we lost the standard -help flag.
llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
"help", llvm::cl::desc("Display available options"),
llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::GeneralCategory)};
// FIXME: Allow commands to signal failure.
virtual void run() = 0;
protected:
const SymbolIndex *Index;
public:
virtual ~Command() = default;
bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview,
const SymbolIndex &Index) {
std::string ParseErrs;
llvm::raw_string_ostream OS(ParseErrs);
bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
Overview, &OS);
// must do this before opts are destroyed
auto Cleanup = llvm::make_scope_exit(llvm::cl::ResetCommandLineParser);
if (Help.getNumOccurrences() > 0) {
// Avoid printing parse errors in this case.
// (Well, in theory. A bunch get printed to llvm::errs() regardless!)
llvm::cl::PrintHelpMessage();
return true;
}
llvm::outs() << OS.str();
if (Ok) {
this->Index = &Index;
reportTime(Argv[0], [&] { run(); });
}
return Ok;
}
};
// FIXME(kbobyrev): Ideas for more commands:
// * load/swap/reload index: this would make it possible to get rid of llvm::cl
// usages in the tool driver and actually use llvm::cl library in the REPL.
// * show posting list density histogram (our dump data somewhere so that user
// could build one)
// * show number of tokens of each kind
// * print out tokens with the most dense posting lists
// * print out tokens with least dense posting lists
class FuzzyFind : public Command {
llvm::cl::opt<std::string> Query{
"query",
llvm::cl::Positional,
llvm::cl::Required,
llvm::cl::desc("Query string to be fuzzy-matched"),
};
llvm::cl::opt<std::string> Scopes{
"scopes",
llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
};
llvm::cl::opt<unsigned> Limit{
"limit",
llvm::cl::init(10),
llvm::cl::desc("Max results to display"),
};
void run() override {
FuzzyFindRequest Request;
Request.Limit = Limit;
Request.Query = Query;
if (Scopes.getNumOccurrences() > 0) {
llvm::SmallVector<llvm::StringRef, 8> Scopes;
llvm::StringRef(this->Scopes).split(Scopes, ',');
Request.Scopes = {Scopes.begin(), Scopes.end()};
}
Request.AnyScope = Request.Scopes.empty();
// FIXME(kbobyrev): Print symbol final scores to see the distribution.
static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
"Symbol Name");
size_t Rank = 0;
Index->fuzzyFind(Request, [&](const Symbol &Sym) {
llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
Sym.Scope + Sym.Name);
});
}
};
class Lookup : public Command {
llvm::cl::opt<std::string> ID{
"id",
llvm::cl::Positional,
llvm::cl::desc("Symbol ID to look up (hex)"),
};
llvm::cl::opt<std::string> Name{
"name",
llvm::cl::desc("Qualified name to look up."),
};
void run() override {
if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
llvm::errs()
<< "Missing required argument: please provide id or -name.\n";
return;
}
std::vector<SymbolID> IDs;
if (ID.getNumOccurrences()) {
auto SID = SymbolID::fromStr(ID);
if (!SID) {
llvm::errs() << llvm::toString(SID.takeError()) << "\n";
return;
}
IDs.push_back(*SID);
} else {
IDs = getSymbolIDsFromIndex(Name, Index);
}
LookupRequest Request;
Request.IDs.insert(IDs.begin(), IDs.end());
bool FoundSymbol = false;
Index->lookup(Request, [&](const Symbol &Sym) {
FoundSymbol = true;
llvm::outs() << toYAML(Sym);
});
if (!FoundSymbol)
llvm::errs() << "not found\n";
}
};
class Refs : public Command {
llvm::cl::opt<std::string> ID{
"id",
llvm::cl::Positional,
llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
};
llvm::cl::opt<std::string> Name{
"name",
llvm::cl::desc("Qualified name of the symbol being queried."),
};
llvm::cl::opt<std::string> Filter{
"filter",
llvm::cl::init(".*"),
llvm::cl::desc(
"Print all results from files matching this regular expression."),
};
void run() override {
if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
llvm::errs()
<< "Missing required argument: please provide id or -name.\n";
return;
}
std::vector<SymbolID> IDs;
if (ID.getNumOccurrences()) {
auto SID = SymbolID::fromStr(ID);
if (!SID) {
llvm::errs() << llvm::toString(SID.takeError()) << "\n";
return;
}
IDs.push_back(*SID);
} else {
IDs = getSymbolIDsFromIndex(Name, Index);
if (IDs.size() > 1) {
llvm::errs() << llvm::formatv(
"The name {0} is ambiguous, found {1} different "
"symbols. Please use id flag to disambiguate.\n",
Name, IDs.size());
return;
}
}
RefsRequest RefRequest;
RefRequest.IDs.insert(IDs.begin(), IDs.end());
llvm::Regex RegexFilter(Filter);
Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
auto U = URI::parse(R.Location.FileURI);
if (!U) {
llvm::errs() << U.takeError();
return;
}
if (RegexFilter.match(U->body()))
llvm::outs() << R << "\n";
});
}
};
class Export : public Command {
llvm::cl::opt<IndexFileFormat> Format{
"format",
llvm::cl::desc("Format of index export"),
llvm::cl::values(
clEnumValN(IndexFileFormat::YAML, "yaml",
"human-readable YAML format"),
clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format")),
llvm::cl::init(IndexFileFormat::YAML),
};
llvm::cl::opt<std::string> OutputFile{
"output-file",
llvm::cl::Positional,
llvm::cl::Required,
llvm::cl::desc("Output file for export"),
};
public:
void run() override {
using namespace clang::clangd;
// Read input file (as specified in global option)
auto Buffer = llvm::MemoryBuffer::getFile(IndexLocation);
if (!Buffer) {
llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation) << "\n";
return;
}
// Auto-detects input format when parsing
auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer());
if (!IndexIn) {
llvm::errs() << llvm::toString(IndexIn.takeError()) << "\n";
return;
}
// Prepare output file
std::error_code EC;
llvm::raw_fd_ostream OutputStream(OutputFile, EC);
if (EC) {
llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile)
<< "\n";
return;
}
// Export
clang::clangd::IndexFileOut IndexOut(IndexIn.get());
IndexOut.Format = Format;
OutputStream << IndexOut;
}
};
struct {
const char *Name;
const char *Description;
std::function<std::unique_ptr<Command>()> Implementation;
} CommandInfo[] = {
{"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>},
{"lookup", "Dump symbol details by ID or qualified name",
std::make_unique<Lookup>},
{"refs", "Find references by ID or qualified name", std::make_unique<Refs>},
{"export", "Export index", std::make_unique<Export>},
};
std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
return Index.startswith("remote:")
? remote::getClient(Index.drop_front(strlen("remote:")),
ProjectRoot)
: loadIndex(Index, /*UseDex=*/true);
}
bool runCommand(std::string Request, const SymbolIndex &Index) {
// Split on spaces and add required null-termination.
std::replace(Request.begin(), Request.end(), ' ', '\0');
llvm::SmallVector<llvm::StringRef, 8> Args;
llvm::StringRef(Request).split(Args, '\0', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
if (Args.empty())
return false;
if (Args.front() == "help") {
llvm::outs() << "dexp - Index explorer\nCommands:\n";
for (const auto &C : CommandInfo)
llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
return true;
}
llvm::SmallVector<const char *, 8> FakeArgv;
for (llvm::StringRef S : Args)
FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
for (const auto &Cmd : CommandInfo) {
if (Cmd.Name == Args.front())
return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description,
Index);
}
llvm::errs() << "Unknown command. Try 'help'.\n";
return false;
}
} // namespace
} // namespace clangd
} // namespace clang
int main(int argc, const char *argv[]) {
using namespace clang::clangd;
llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
bool RemoteMode = llvm::StringRef(IndexLocation).startswith("remote:");
if (RemoteMode && ProjectRoot.empty()) {
llvm::errs() << "--project-root is required in remote mode\n";
return -1;
}
std::unique_ptr<SymbolIndex> Index;
reportTime(RemoteMode ? "Remote index client creation" : "Dex build",
[&]() { Index = openIndex(IndexLocation); });
if (!Index) {
llvm::errs() << "Failed to open the index.\n";
return -1;
}
if (!ExecCommand.empty())
return runCommand(ExecCommand, *Index) ? 0 : 1;
llvm::LineEditor LE("dexp");
while (llvm::Optional<std::string> Request = LE.readLine())
runCommand(std::move(*Request), *Index);
}