parser_feedback_simulator.js
4.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
'use strict';
var Tokenizer = require('../tokenizer'),
foreignContent = require('../common/foreign_content'),
UNICODE = require('../common/unicode'),
HTML = require('../common/html');
//Aliases
var $ = HTML.TAG_NAMES,
NS = HTML.NAMESPACES;
//ParserFeedbackSimulator
//Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
var ParserFeedbackSimulator = module.exports = function (tokenizer) {
this.tokenizer = tokenizer;
this.namespaceStack = [];
this.namespaceStackTop = -1;
this._enterNamespace(NS.HTML);
};
ParserFeedbackSimulator.prototype.getNextToken = function () {
var token = this.tokenizer.getNextToken();
if (token.type === Tokenizer.START_TAG_TOKEN)
this._handleStartTagToken(token);
else if (token.type === Tokenizer.END_TAG_TOKEN)
this._handleEndTagToken(token);
else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
token.type = Tokenizer.CHARACTER_TOKEN;
token.chars = UNICODE.REPLACEMENT_CHARACTER;
}
else if (this.skipNextNewLine) {
if (token.type !== Tokenizer.HIBERNATION_TOKEN)
this.skipNextNewLine = false;
if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
if (token.chars.length === 1)
return this.getNextToken();
token.chars = token.chars.substr(1);
}
}
return token;
};
//Namespace stack mutations
ParserFeedbackSimulator.prototype._enterNamespace = function (namespace) {
this.namespaceStackTop++;
this.namespaceStack.push(namespace);
this.inForeignContent = namespace !== NS.HTML;
this.currentNamespace = namespace;
this.tokenizer.allowCDATA = this.inForeignContent;
};
ParserFeedbackSimulator.prototype._leaveCurrentNamespace = function () {
this.namespaceStackTop--;
this.namespaceStack.pop();
this.currentNamespace = this.namespaceStack[this.namespaceStackTop];
this.inForeignContent = this.currentNamespace !== NS.HTML;
this.tokenizer.allowCDATA = this.inForeignContent;
};
//Token handlers
ParserFeedbackSimulator.prototype._ensureTokenizerMode = function (tn) {
if (tn === $.TEXTAREA || tn === $.TITLE)
this.tokenizer.state = Tokenizer.MODE.RCDATA;
else if (tn === $.PLAINTEXT)
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
else if (tn === $.SCRIPT)
this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP ||
tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT)
this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
};
ParserFeedbackSimulator.prototype._handleStartTagToken = function (token) {
var tn = token.tagName;
if (tn === $.SVG)
this._enterNamespace(NS.SVG);
else if (tn === $.MATH)
this._enterNamespace(NS.MATHML);
if (this.inForeignContent) {
if (foreignContent.causesExit(token)) {
this._leaveCurrentNamespace();
return;
}
var currentNs = this.currentNamespace;
if (currentNs === NS.MATHML)
foreignContent.adjustTokenMathMLAttrs(token);
else if (currentNs === NS.SVG) {
foreignContent.adjustTokenSVGTagName(token);
foreignContent.adjustTokenSVGAttrs(token);
}
foreignContent.adjustTokenXMLAttrs(token);
tn = token.tagName;
if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs))
this._enterNamespace(NS.HTML);
}
else {
if (tn === $.PRE || tn === $.TEXTAREA || tn === $.LISTING)
this.skipNextNewLine = true;
else if (tn === $.IMAGE)
token.tagName = $.IMG;
this._ensureTokenizerMode(tn);
}
};
ParserFeedbackSimulator.prototype._handleEndTagToken = function (token) {
var tn = token.tagName;
if (!this.inForeignContent) {
var previousNs = this.namespaceStack[this.namespaceStackTop - 1];
if (previousNs === NS.SVG && foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn])
tn = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn];
//NOTE: check for exit from integration point
if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs))
this._leaveCurrentNamespace();
}
else if (tn === $.SVG && this.currentNamespace === NS.SVG ||
tn === $.MATH && this.currentNamespace === NS.MATHML)
this._leaveCurrentNamespace();
// NOTE: adjust end tag name as well for consistency
if (this.currentNamespace === NS.SVG)
foreignContent.adjustTokenSVGTagName(token);
};