ThinLtoModuleIndex.cpp
8.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
#include "ThinLtoModuleIndex.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
#include <string>
#define DEBUG_TYPE "thinltojit"
namespace llvm {
namespace orc {
Error ThinLtoModuleIndex::add(StringRef InputPath) {
auto Buffer = errorOrToExpected(MemoryBuffer::getFile(InputPath));
if (!Buffer)
return Buffer.takeError();
Error ParseErr = readModuleSummaryIndex((*Buffer)->getMemBufferRef(),
CombinedSummaryIndex, NextModuleId);
if (ParseErr)
return ParseErr;
#ifndef NDEBUG
auto Paths = getAllModulePaths();
unsigned TotalPaths = Paths.size();
std::sort(Paths.begin(), Paths.end());
Paths.erase(std::unique(Paths.begin(), Paths.end()), Paths.end());
assert(TotalPaths == Paths.size() && "Module paths must be unique");
#endif
++NextModuleId;
return Error::success();
}
std::vector<StringRef> ThinLtoModuleIndex::getAllModulePaths() const {
auto ModuleTable = CombinedSummaryIndex.modulePaths();
std::vector<StringRef> Paths;
Paths.resize(ModuleTable.size());
for (const auto &KV : ModuleTable) {
assert(Paths[KV.second.first].empty() && "IDs are unique and continuous");
Paths[KV.second.first] = KV.first();
}
return Paths;
}
GlobalValueSummary *
ThinLtoModuleIndex::getSummary(GlobalValue::GUID Function) const {
ValueInfo VI = CombinedSummaryIndex.getValueInfo(Function);
if (!VI || VI.getSummaryList().empty())
return nullptr;
// There can be more than one symbol with the same GUID, in the case of same-
// named locals in different but same-named source files that were compiled in
// their respective directories (so the source file name and resulting GUID is
// the same). We avoid this by checking that module paths are unique upon
// add().
//
// TODO: We can still get duplicates on symbols declared with
// attribute((weak)), a GNU extension supported by gcc and clang.
// We should support it by looking for a symbol in the current module
// or in the same module as the caller.
assert(VI.getSummaryList().size() == 1 && "Weak symbols not yet supported");
return VI.getSummaryList().front().get()->getBaseObject();
}
Optional<StringRef>
ThinLtoModuleIndex::getModulePathForSymbol(StringRef Name) const {
if (GlobalValueSummary *S = getSummary(GlobalValue::getGUID(Name)))
return S->modulePath();
return None; // We don't know the symbol.
}
void ThinLtoModuleIndex::scheduleModuleParsingPrelocked(StringRef Path) {
// Once the module was scheduled, we can call takeModule().
auto ScheduledIt = ScheduledModules.find(Path);
if (ScheduledIt != ScheduledModules.end())
return;
auto Worker = [this](std::string Path) {
if (auto TSM = doParseModule(Path)) {
std::lock_guard<std::mutex> Lock(ParsedModulesLock);
ParsedModules[Path] = std::move(*TSM);
LLVM_DEBUG(dbgs() << "Finished parsing module: " << Path << "\n");
} else {
ES.reportError(TSM.takeError());
}
};
LLVM_DEBUG(dbgs() << "Schedule module for parsing: " << Path << "\n");
ScheduledModules[Path] = ParseModuleWorkers.async(Worker, Path.str());
}
ThreadSafeModule ThinLtoModuleIndex::takeModule(StringRef Path) {
std::unique_lock<std::mutex> ParseLock(ParsedModulesLock);
auto ParsedIt = ParsedModules.find(Path);
if (ParsedIt == ParsedModules.end()) {
ParseLock.unlock();
// The module is not ready, wait for the future we stored.
std::unique_lock<std::mutex> ScheduleLock(ScheduledModulesLock);
auto ScheduledIt = ScheduledModules.find(Path);
assert(ScheduledIt != ScheduledModules.end() &&
"Don't call for unscheduled modules");
std::shared_future<void> Future = ScheduledIt->getValue();
ScheduleLock.unlock();
Future.get();
ParseLock.lock();
ParsedIt = ParsedModules.find(Path);
assert(ParsedIt != ParsedModules.end() && "Must be ready now");
}
// We only add each module once. If it's not here anymore, we can skip it.
ThreadSafeModule TSM = std::move(ParsedIt->getValue());
ParsedIt->getValue() = ThreadSafeModule();
return TSM;
}
ThreadSafeModule ThinLtoModuleIndex::parseModuleFromFile(StringRef Path) {
{
std::lock_guard<std::mutex> ScheduleLock(ScheduledModulesLock);
scheduleModuleParsingPrelocked(Path);
}
return takeModule(Path);
}
Expected<ThreadSafeModule> ThinLtoModuleIndex::doParseModule(StringRef Path) {
// TODO: make a SMDiagnosticError class for this
SMDiagnostic Err;
auto Ctx = std::make_unique<LLVMContext>();
auto M = parseIRFile(Path, Err, *Ctx);
if (!M) {
std::string ErrDescription;
{
raw_string_ostream S(ErrDescription);
Err.print("ThinLtoJIT", S);
}
return createStringError(inconvertibleErrorCode(),
"Failed to load module from file '%s' (%s)",
Path.data(), ErrDescription.c_str());
}
return ThreadSafeModule(std::move(M), std::move(Ctx));
}
// We don't filter visited functions. Discovery will often be retriggered
// from the middle of already visited functions and it aims to reach a little
// further each time.
void ThinLtoModuleIndex::discoverCalleeModulePaths(FunctionSummary *S,
unsigned LookaheadLevels) {
// Populate initial worklist
std::vector<FunctionSummary *> Worklist;
addToWorklist(Worklist, S->calls());
unsigned Distance = 0;
while (++Distance < LookaheadLevels) {
// Process current worklist and populate a new one.
std::vector<FunctionSummary *> NextWorklist;
for (FunctionSummary *F : Worklist) {
updatePathRank(F->modulePath(), Distance);
addToWorklist(NextWorklist, F->calls());
}
Worklist = std::move(NextWorklist);
}
// Process the last worklist without filling a new one
for (FunctionSummary *F : Worklist) {
updatePathRank(F->modulePath(), Distance);
}
// Reset counts for known paths (includes both, scheduled and parsed modules).
std::lock_guard<std::mutex> Lock(ScheduledModulesLock);
for (const auto &KV : ScheduledModules) {
PathRank[KV.first()].Count = 0;
}
}
void ThinLtoModuleIndex::addToWorklist(
std::vector<FunctionSummary *> &List,
ArrayRef<FunctionSummary::EdgeTy> Calls) {
for (const auto &Edge : Calls) {
const auto &SummaryList = Edge.first.getSummaryList();
if (!SummaryList.empty()) {
GlobalValueSummary *S = SummaryList.front().get()->getBaseObject();
assert(isa<FunctionSummary>(S) && "Callees must be functions");
List.push_back(cast<FunctionSummary>(S));
}
}
}
// PathRank is global and continuous.
void ThinLtoModuleIndex::updatePathRank(StringRef Path, unsigned Distance) {
auto &Entry = PathRank[Path];
Entry.Count += 1;
Entry.MinDist = std::min(Entry.MinDist, Distance);
assert(Entry.MinDist > 0 && "We want it as a divisor");
}
// TODO: The size of a ThreadPool's task queue is not accessible. It would
// be great to know in order to estimate how many modules we schedule. The
// more we schedule, the less precise is the ranking. The less we schedule,
// the higher the risk for downtime.
std::vector<std::string> ThinLtoModuleIndex::selectNextPaths() {
struct ScorePath {
float Score;
unsigned MinDist;
StringRef Path;
};
std::vector<ScorePath> Candidates;
Candidates.reserve(PathRank.size());
for (const auto &KV : PathRank) {
float Score = static_cast<float>(KV.second.Count) / KV.second.MinDist;
if (Score > .0f) {
Candidates.push_back({Score, KV.second.MinDist, KV.first()});
}
}
// Sort candidates by descending score.
std::sort(Candidates.begin(), Candidates.end(),
[](const ScorePath &LHS, const ScorePath &RHS) {
return LHS.Score > RHS.Score;
});
// Sort highest score candidates by ascending minimal distance.
size_t Selected =
std::min(std::max<size_t>(NumParseModuleThreads, Candidates.size() / 2),
Candidates.size());
std::sort(Candidates.begin(), Candidates.begin() + Selected,
[](const ScorePath &LHS, const ScorePath &RHS) {
return LHS.MinDist < RHS.MinDist;
});
std::vector<std::string> Paths;
Paths.reserve(Selected);
for (unsigned i = 0; i < Selected; i++) {
Paths.push_back(Candidates[i].Path.str());
}
LLVM_DEBUG(dbgs() << "ModuleIndex: select " << Paths.size() << " out of "
<< Candidates.size() << " discovered paths\n");
return Paths;
}
unsigned ThinLtoModuleIndex::getNumDiscoveredModules() const {
// TODO: It would probably be more efficient to track the number of
// unscheduled modules.
unsigned NonNullItems = 0;
for (const auto &KV : PathRank)
if (KV.second.Count > 0)
++NonNullItems;
return NonNullItems;
}
} // namespace orc
} // namespace llvm