Tokenizer.d.ts
5.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
/** All the states the tokenizer can be in. */
declare const enum State {
Text = 1,
BeforeTagName = 2,
InTagName = 3,
InSelfClosingTag = 4,
BeforeClosingTagName = 5,
InClosingTagName = 6,
AfterClosingTagName = 7,
BeforeAttributeName = 8,
InAttributeName = 9,
AfterAttributeName = 10,
BeforeAttributeValue = 11,
InAttributeValueDq = 12,
InAttributeValueSq = 13,
InAttributeValueNq = 14,
BeforeDeclaration = 15,
InDeclaration = 16,
InProcessingInstruction = 17,
BeforeComment = 18,
InComment = 19,
InSpecialComment = 20,
AfterComment1 = 21,
AfterComment2 = 22,
BeforeCdata1 = 23,
BeforeCdata2 = 24,
BeforeCdata3 = 25,
BeforeCdata4 = 26,
BeforeCdata5 = 27,
BeforeCdata6 = 28,
InCdata = 29,
AfterCdata1 = 30,
AfterCdata2 = 31,
BeforeSpecialS = 32,
BeforeSpecialSEnd = 33,
BeforeScript1 = 34,
BeforeScript2 = 35,
BeforeScript3 = 36,
BeforeScript4 = 37,
BeforeScript5 = 38,
AfterScript1 = 39,
AfterScript2 = 40,
AfterScript3 = 41,
AfterScript4 = 42,
AfterScript5 = 43,
BeforeStyle1 = 44,
BeforeStyle2 = 45,
BeforeStyle3 = 46,
BeforeStyle4 = 47,
AfterStyle1 = 48,
AfterStyle2 = 49,
AfterStyle3 = 50,
AfterStyle4 = 51,
BeforeSpecialT = 52,
BeforeSpecialTEnd = 53,
BeforeTitle1 = 54,
BeforeTitle2 = 55,
BeforeTitle3 = 56,
BeforeTitle4 = 57,
AfterTitle1 = 58,
AfterTitle2 = 59,
AfterTitle3 = 60,
AfterTitle4 = 61,
BeforeEntity = 62,
BeforeNumericEntity = 63,
InNamedEntity = 64,
InNumericEntity = 65,
InHexEntity = 66
}
export interface Callbacks {
onattribdata(value: string): void;
onattribend(quote: string | undefined | null): void;
onattribname(name: string): void;
oncdata(data: string): void;
onclosetag(name: string): void;
oncomment(data: string): void;
ondeclaration(content: string): void;
onend(): void;
onerror(error: Error, state?: State): void;
onopentagend(): void;
onopentagname(name: string): void;
onprocessinginstruction(instruction: string): void;
onselfclosingtag(): void;
ontext(value: string): void;
}
export default class Tokenizer {
/** The current state the tokenizer is in. */
_state: State;
/** The read buffer. */
private buffer;
/** The beginning of the section that is currently being read. */
sectionStart: number;
/** The index within the buffer that we are currently looking at. */
_index: number;
/**
* Data that has already been processed will be removed from the buffer occasionally.
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
*/
private bufferOffset;
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
private baseState;
/** For special parsing behavior inside of script and style tags. */
private special;
/** Indicates whether the tokenizer has been paused. */
private running;
/** Indicates whether the tokenizer has finished running / `.end` has been called. */
private ended;
private readonly cbs;
private readonly xmlMode;
private readonly decodeEntities;
constructor(options: {
xmlMode?: boolean;
decodeEntities?: boolean;
} | null, cbs: Callbacks);
reset(): void;
write(chunk: string): void;
end(chunk?: string): void;
pause(): void;
resume(): void;
/**
* The current index within all of the written data.
*/
getAbsoluteIndex(): number;
private stateText;
/**
* HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
*
* XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
* We allow anything that wouldn't end the tag.
*/
private isTagStartChar;
private stateBeforeTagName;
private stateInTagName;
private stateBeforeClosingTagName;
private stateInClosingTagName;
private stateAfterClosingTagName;
private stateBeforeAttributeName;
private stateInSelfClosingTag;
private stateInAttributeName;
private stateAfterAttributeName;
private stateBeforeAttributeValue;
private handleInAttributeValue;
private stateInAttributeValueDoubleQuotes;
private stateInAttributeValueSingleQuotes;
private stateInAttributeValueNoQuotes;
private stateBeforeDeclaration;
private stateInDeclaration;
private stateInProcessingInstruction;
private stateBeforeComment;
private stateInComment;
private stateInSpecialComment;
private stateAfterComment1;
private stateAfterComment2;
private stateBeforeCdata6;
private stateInCdata;
private stateAfterCdata1;
private stateAfterCdata2;
private stateBeforeSpecialS;
private stateBeforeSpecialSEnd;
private stateBeforeSpecialLast;
private stateAfterSpecialLast;
private parseFixedEntity;
private parseLegacyEntity;
private stateInNamedEntity;
private decodeNumericEntity;
private stateInNumericEntity;
private stateInHexEntity;
private cleanup;
/**
* Iterates through the buffer, calling the function corresponding to the current state.
*
* States that are more likely to be hit are higher up, as a performance improvement.
*/
private parse;
private finish;
private handleTrailingData;
private getSection;
private emitToken;
private emitPartial;
}
export {};
//# sourceMappingURL=Tokenizer.d.ts.map