Tokenizer.d.ts
5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
export declare enum QuoteType {
NoValue = 0,
Unquoted = 1,
Single = 2,
Double = 3
}
export interface Callbacks {
onattribdata(start: number, endIndex: number): void;
onattribentity(codepoint: number): void;
onattribend(quote: QuoteType, endIndex: number): void;
onattribname(start: number, endIndex: number): void;
oncdata(start: number, endIndex: number, endOffset: number): void;
onclosetag(start: number, endIndex: number): void;
oncomment(start: number, endIndex: number, endOffset: number): void;
ondeclaration(start: number, endIndex: number): void;
onend(): void;
onopentagend(endIndex: number): void;
onopentagname(start: number, endIndex: number): void;
onprocessinginstruction(start: number, endIndex: number): void;
onselfclosingtag(endIndex: number): void;
ontext(start: number, endIndex: number): void;
ontextentity(codepoint: number): void;
}
export default class Tokenizer {
private readonly cbs;
/** The current state the tokenizer is in. */
private state;
/** The read buffer. */
private buffer;
/** The beginning of the section that is currently being read. */
private sectionStart;
/** The index within the buffer that we are currently looking at. */
private index;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
private baseState;
/** For special parsing behavior inside of script and style tags. */
private isSpecial;
/** Indicates whether the tokenizer has been paused. */
running: boolean;
/** The offset of the current buffer. */
private offset;
private readonly xmlMode;
private readonly decodeEntities;
private readonly entityTrie;
constructor({ xmlMode, decodeEntities, }: {
xmlMode?: boolean;
decodeEntities?: boolean;
}, cbs: Callbacks);
reset(): void;
write(chunk: string): void;
end(): void;
pause(): void;
resume(): void;
/**
* The current index within all of the written data.
*/
getIndex(): number;
/**
* The start of the current section.
*/
getSectionStart(): number;
private stateText;
private currentSequence;
private sequenceIndex;
private stateSpecialStartSequence;
/** Look for an end tag. For <title> tags, also decode entities. */
private stateInSpecialTag;
private stateCDATASequence;
/**
* When we wait for one specific character, we can speed things up
* by skipping through the buffer until we find it.
*
* @returns Whether the character was found.
*/
private fastForwardTo;
/**
* Comments and CDATA end with `-->` and `]]>`.
*
* Their common qualities are:
* - Their end sequences have a distinct character they start with.
* - That character is then repeated, so we have to check multiple repeats.
* - All characters but the start character of the sequence can be skipped.
*/
private stateInCommentLike;
/**
* HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
*
* XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
* We allow anything that wouldn't end the tag.
*/
private isTagStartChar;
private startSpecial;
private stateBeforeTagName;
private stateInTagName;
private stateBeforeClosingTagName;
private stateInClosingTagName;
private stateAfterClosingTagName;
private stateBeforeAttributeName;
private stateInSelfClosingTag;
private stateInAttributeName;
private stateAfterAttributeName;
private stateBeforeAttributeValue;
private handleInAttributeValue;
private stateInAttributeValueDoubleQuotes;
private stateInAttributeValueSingleQuotes;
private stateInAttributeValueNoQuotes;
private stateBeforeDeclaration;
private stateInDeclaration;
private stateInProcessingInstruction;
private stateBeforeComment;
private stateInSpecialComment;
private stateBeforeSpecialS;
private trieIndex;
private trieCurrent;
/** For named entities, the index of the value. For numeric entities, the code point. */
private entityResult;
private entityExcess;
private stateBeforeEntity;
private stateInNamedEntity;
private emitNamedEntity;
private stateBeforeNumericEntity;
private emitNumericEntity;
private stateInNumericEntity;
private stateInHexEntity;
private allowLegacyEntity;
/**
* Remove data that has already been consumed from the buffer.
*/
private cleanup;
private shouldContinue;
/**
* Iterates through the buffer, calling the function corresponding to the current state.
*
* States that are more likely to be hit are higher up, as a performance improvement.
*/
private parse;
private finish;
/** Handle any trailing data. */
private handleTrailingData;
private emitPartial;
private emitCodePoint;
}
//# sourceMappingURL=Tokenizer.d.ts.map