Format.cpp
15.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
//===--- Format.cpp -----------------------------------------*- C++-*------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Format.h"
#include "Logger.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/Core/Replacement.h"
#include "llvm/Support/Unicode.h"
namespace clang {
namespace clangd {
namespace {
/// Append closing brackets )]} to \p Code to make it well-formed.
/// Clang-format conservatively refuses to format files with unmatched brackets
/// as it isn't sure where the errors are and so can't correct.
/// When editing, it's reasonable to assume code before the cursor is complete.
void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
SourceManagerForFile FileSM("dummy.cpp", Code);
auto &SM = FileSM.get();
FileID FID = SM.getMainFileID();
Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
Token Tok;
std::vector<char> Brackets;
while (!Lex.LexFromRawLexer(Tok)) {
switch(Tok.getKind()) {
case tok::l_paren:
Brackets.push_back(')');
break;
case tok::l_brace:
Brackets.push_back('}');
break;
case tok::l_square:
Brackets.push_back(']');
break;
case tok::r_paren:
if (!Brackets.empty() && Brackets.back() == ')')
Brackets.pop_back();
break;
case tok::r_brace:
if (!Brackets.empty() && Brackets.back() == '}')
Brackets.pop_back();
break;
case tok::r_square:
if (!Brackets.empty() && Brackets.back() == ']')
Brackets.pop_back();
break;
default:
continue;
}
}
// Attempt to end any open comments first.
Code.append("\n// */\n");
Code.append(Brackets.rbegin(), Brackets.rend());
}
static StringRef commentMarker(llvm::StringRef Line) {
for (StringRef Marker : {"///", "//"}){
auto I = Line.rfind(Marker);
if (I != StringRef::npos)
return Line.substr(I, Marker.size());
}
return "";
}
llvm::StringRef firstLine(llvm::StringRef Code) {
return Code.take_until([](char C) { return C == '\n'; });
}
llvm::StringRef lastLine(llvm::StringRef Code) {
llvm::StringRef Rest = Code;
while (!Rest.empty() && Rest.back() != '\n')
Rest = Rest.drop_back();
return Code.substr(Rest.size());
}
// Filename is needed for tooling::Replacement and some overloads of reformat().
// Its value should not affect the outcome. We use the default from reformat().
llvm::StringRef Filename = "<stdin>";
// tooling::Replacement from overlapping StringRefs: From must be part of Code.
tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
llvm::StringRef To) {
assert(From.begin() >= Code.begin() && From.end() <= Code.end());
// The filename is required but ignored.
return tooling::Replacement(Filename, From.data() - Code.data(),
From.size(), To);
}
// High-level representation of incremental formatting changes.
// The changes are made in two steps.
// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
// comment markers when splitting a line comment with a newline).
// 2) a selective clang-format run:
// - the "source code" passed to clang format is the code up to the cursor,
// a placeholder for the cursor, and some closing brackets
// - the formatting is restricted to the cursor and (possibly) other ranges
// (e.g. the old line when inserting a newline).
// - changes before the cursor are applied, those after are discarded.
struct IncrementalChanges {
// Changes that should be applied before running clang-format.
tooling::Replacements Changes;
// Ranges of the original source code that should be clang-formatted.
// The CursorProxyText will also be formatted.
std::vector<tooling::Range> FormatRanges;
// The source code that should stand in for the cursor when clang-formatting.
// e.g. after inserting a newline, a line-comment at the cursor is used to
// ensure that the newline is preserved.
std::string CursorPlaceholder;
};
// After a newline:
// - we continue any line-comment that was split
// - we format the old line in addition to the cursor
// - we represent the cursor with a line comment to preserve the newline
IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
unsigned Cursor) {
IncrementalChanges Result;
// Before newline, code looked like:
// leading^trailing
// After newline, code looks like:
// leading
// indentation^trailing
// Where indentation was added by the editor.
StringRef Trailing = firstLine(Code.substr(Cursor));
StringRef Indentation = lastLine(Code.take_front(Cursor));
if (Indentation.data() == Code.data()) {
vlog("Typed a newline, but we're still on the first line!");
return Result;
}
StringRef Leading =
lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
// Strip leading whitespace on trailing line.
StringRef TrailingTrim = Trailing.ltrim();
if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
cantFail(Result.Changes.add(
replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
// If we split a comment, replace indentation with a comment marker.
// If the editor made the new line a comment, also respect that.
StringRef CommentMarker = commentMarker(Leading);
bool NewLineIsComment = !commentMarker(Indentation).empty();
if (!CommentMarker.empty() &&
(NewLineIsComment || !commentMarker(NextLine).empty() ||
(!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
using llvm::sys::unicode::columnWidthUTF8;
// We indent the new comment to match the previous one.
StringRef PreComment =
Leading.take_front(CommentMarker.data() - Leading.data());
std::string IndentAndComment =
(std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
.str();
cantFail(
Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
} else {
// Remove any indentation and let clang-format re-add it.
// This prevents the cursor marker dragging e.g. an aligned comment with it.
cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
}
// If we put a the newline inside a {} pair, put } on its own line...
if (CommentMarker.empty() && Leading.endswith("{") &&
Trailing.startswith("}")) {
cantFail(
Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
// ...and format it.
Result.FormatRanges.push_back(
tooling::Range(Trailing.data() - Code.data() + 1, 1));
}
// Format the whole leading line.
Result.FormatRanges.push_back(
tooling::Range(Leading.data() - Code.data(), Leading.size()));
// We use a comment to represent the cursor, to preserve the newline.
// A trailing identifier improves parsing of e.g. for without braces.
// Exception: if the previous line has a trailing comment, we can't use one
// as the cursor (they will be aligned). But in this case we don't need to.
Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
return Result;
}
IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
llvm::StringRef InsertedText) {
IncrementalChanges Result;
if (InsertedText == "\n")
return getIncrementalChangesAfterNewline(Code, Cursor);
Result.CursorPlaceholder = " /**/";
return Result;
}
// Returns equivalent replacements that preserve the correspondence between
// OldCursor and NewCursor. If OldCursor lies in a replaced region, that
// replacement will be split.
std::vector<tooling::Replacement>
split(const tooling::Replacements &Replacements, unsigned OldCursor,
unsigned NewCursor) {
std::vector<tooling::Replacement> Result;
int LengthChange = 0;
for (const tooling::Replacement &R : Replacements) {
if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
Result.push_back(R);
LengthChange += R.getReplacementText().size() - R.getLength();
} else if (R.getOffset() < OldCursor) { // overlaps cursor
int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
assert(ReplacementSplit >= 0 &&
ReplacementSplit <= int(R.getReplacementText().size()) &&
"NewCursor incompatible with OldCursor!");
Result.push_back(tooling::Replacement(
R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
R.getReplacementText().take_front(ReplacementSplit)));
Result.push_back(tooling::Replacement(
R.getFilePath(), OldCursor,
R.getLength() - (OldCursor - R.getOffset()),
R.getReplacementText().drop_front(ReplacementSplit)));
} else if (R.getOffset() >= OldCursor) { // after cursor
Result.push_back(R);
}
}
return Result;
}
} // namespace
// We're simulating the following sequence of changes:
// - apply the pre-formatting edits (see getIncrementalChanges)
// - insert a placeholder for the cursor
// - format some of the resulting code
// - remove the cursor placeholder again
// The replacements we return are produced by composing these.
//
// The text we actually pass to clang-format is slightly different from this,
// e.g. we have to close brackets. We ensure these differences are *after*
// all the regions we want to format, and discard changes in them.
std::vector<tooling::Replacement>
formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
llvm::StringRef InsertedText, format::FormatStyle Style) {
IncrementalChanges Incremental =
getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
// Never *remove* lines in response to pressing enter! This annoys users.
if (InsertedText == "\n") {
Style.MaxEmptyLinesToKeep = 1000;
Style.KeepEmptyLinesAtTheStartOfBlocks = true;
}
// Compute the code we want to format:
// 1) Start with code after the pre-formatting edits.
std::string CodeToFormat = cantFail(
tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
// 2) Truncate code after the last interesting range.
unsigned FormatLimit = Cursor;
for (tooling::Range &R : Incremental.FormatRanges)
FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
CodeToFormat.resize(FormatLimit);
// 3) Insert a placeholder for the cursor.
CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
// 4) Append brackets after FormatLimit so the code is well-formed.
closeBrackets(CodeToFormat, Style);
// Determine the ranges to format:
std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
// Ranges after the cursor need to be adjusted for the placeholder.
for (auto &R : RangesToFormat) {
if (R.getOffset() > Cursor)
R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
R.getLength());
}
// We also format the cursor.
RangesToFormat.push_back(
tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
// Also update FormatLimit for the placeholder, we'll use this later.
FormatLimit += Incremental.CursorPlaceholder.size();
// Run clang-format, and truncate changes at FormatLimit.
tooling::Replacements FormattingChanges;
format::FormattingAttemptStatus Status;
for (const tooling::Replacement &R : format::reformat(
Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
cantFail(FormattingChanges.add(R));
else if(R.getOffset() < FormatLimit) { // Overlaps limit.
if (R.getReplacementText().empty()) // Deletions are easy to handle.
cantFail(FormattingChanges.add(tooling::Replacement(Filename,
R.getOffset(), FormatLimit - R.getOffset(), "")));
else
// Hopefully won't happen in practice?
elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
Cursor, CodeToFormat);
}
}
if (!Status.FormatComplete)
vlog("Incremental format incomplete at line {0}", Status.Line);
// Now we are ready to compose the changes relative to OriginalCode.
// edits -> insert placeholder -> format -> remove placeholder.
// We must express insert/remove as Replacements.
tooling::Replacements InsertCursorPlaceholder(
tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
unsigned FormattedCursorStart =
FormattingChanges.getShiftedCodePosition(Cursor),
FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
Cursor + Incremental.CursorPlaceholder.size());
tooling::Replacements RemoveCursorPlaceholder(
tooling::Replacement(Filename, FormattedCursorStart,
FormattedCursorEnd - FormattedCursorStart, ""));
// We can't simply merge() and return: tooling::Replacements will combine
// adjacent edits left and right of the cursor. This gives the right source
// code, but loses information about where the cursor is!
// Fortunately, none of the individual passes lose information, so:
// - we use merge() to compute the final Replacements
// - we chain getShiftedCodePosition() to compute final cursor position
// - we split the final Replacements at the cursor position, so that
// each Replacement lies either before or after the cursor.
tooling::Replacements Final;
unsigned FinalCursor = OriginalCursor;
#ifndef NDEBUG
std::string FinalCode = OriginalCode;
dlog("Initial code: {0}", FinalCode);
#endif
for (auto Pass :
std::vector<std::pair<const char *, const tooling::Replacements *>>{
{"Pre-formatting changes", &Incremental.Changes},
{"Insert placeholder", &InsertCursorPlaceholder},
{"clang-format", &FormattingChanges},
{"Remove placeholder", &RemoveCursorPlaceholder}}) {
Final = Final.merge(*Pass.second);
FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
#ifndef NDEBUG
FinalCode =
cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
dlog("After {0}:\n{1}^{2}", Pass.first,
StringRef(FinalCode).take_front(FinalCursor),
StringRef(FinalCode).drop_front(FinalCursor));
#endif
}
return split(Final, OriginalCursor, FinalCursor);
}
unsigned
transformCursorPosition(unsigned Offset,
const std::vector<tooling::Replacement> &Replacements) {
unsigned OriginalOffset = Offset;
for (const auto &R : Replacements) {
if (R.getOffset() + R.getLength() <= OriginalOffset) {
// Replacement is before cursor.
Offset += R.getReplacementText().size();
Offset -= R.getLength();
} else if (R.getOffset() < OriginalOffset) {
// Replacement overlaps cursor.
// Preserve position within replacement text, as far as possible.
unsigned PositionWithinReplacement = Offset - R.getOffset();
if (PositionWithinReplacement > R.getReplacementText().size()) {
Offset += R.getReplacementText().size();
Offset -= PositionWithinReplacement;
}
} else {
// Replacement after cursor.
break; // Replacements are sorted, the rest are also after the cursor.
}
}
return Offset;
}
} // namespace clangd
} // namespace clang