encode.js
4.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;
var xml_json_1 = __importDefault(require("./maps/xml.json"));
var inverseXML = getInverseObj(xml_json_1.default);
var xmlReplacer = getInverseReplacer(inverseXML);
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities.
*
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `ü`) will be used.
*/
exports.encodeXML = getASCIIEncoder(inverseXML);
var entities_json_1 = __importDefault(require("./maps/entities.json"));
var inverseHTML = getInverseObj(entities_json_1.default);
var htmlReplacer = getInverseReplacer(inverseHTML);
/**
* Encodes all entities and non-ASCII characters in the input.
*
* This includes characters that are valid ASCII characters in HTML documents.
* For example `#` will be encoded as `#`. To get a more compact output,
* consider using the `encodeNonAsciiHTML` function.
*
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `ü`) will be used.
*/
exports.encodeHTML = getInverse(inverseHTML, htmlReplacer);
/**
* Encodes all non-ASCII characters, as well as characters not valid in HTML
* documents using HTML entities.
*
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `ü`) will be used.
*/
exports.encodeNonAsciiHTML = getASCIIEncoder(inverseHTML);
function getInverseObj(obj) {
return Object.keys(obj)
.sort()
.reduce(function (inverse, name) {
inverse[obj[name]] = "&" + name + ";";
return inverse;
}, {});
}
function getInverseReplacer(inverse) {
var single = [];
var multiple = [];
for (var _i = 0, _a = Object.keys(inverse); _i < _a.length; _i++) {
var k = _a[_i];
if (k.length === 1) {
// Add value to single array
single.push("\\" + k);
}
else {
// Add value to multiple array
multiple.push(k);
}
}
// Add ranges to single characters.
single.sort();
for (var start = 0; start < single.length - 1; start++) {
// Find the end of a run of characters
var end = start;
while (end < single.length - 1 &&
single[end].charCodeAt(1) + 1 === single[end + 1].charCodeAt(1)) {
end += 1;
}
var count = 1 + end - start;
// We want to replace at least three characters
if (count < 3)
continue;
single.splice(start, count, single[start] + "-" + single[end]);
}
multiple.unshift("[" + single.join("") + "]");
return new RegExp(multiple.join("|"), "g");
}
// /[^\0-\x7F]/gu
var reNonASCII = /(?:[\x80-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g;
var getCodePoint =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
String.prototype.codePointAt != null
? // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
function (str) { return str.codePointAt(0); }
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
function (c) {
return (c.charCodeAt(0) - 0xd800) * 0x400 +
c.charCodeAt(1) -
0xdc00 +
0x10000;
};
function singleCharReplacer(c) {
return "&#x" + (c.length > 1 ? getCodePoint(c) : c.charCodeAt(0))
.toString(16)
.toUpperCase() + ";";
}
function getInverse(inverse, re) {
return function (data) {
return data
.replace(re, function (name) { return inverse[name]; })
.replace(reNonASCII, singleCharReplacer);
};
}
var reEscapeChars = new RegExp(xmlReplacer.source + "|" + reNonASCII.source, "g");
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using numeric hexadecimal reference (eg. `ü`).
*
* Have a look at `escapeUTF8` if you want a more concise output at the expense
* of reduced transportability.
*
* @param data String to escape.
*/
function escape(data) {
return data.replace(reEscapeChars, singleCharReplacer);
}
exports.escape = escape;
/**
* Encodes all characters not valid in XML documents using numeric hexadecimal
* reference (eg. `ü`).
*
* Note that the output will be character-set dependent.
*
* @param data String to escape.
*/
function escapeUTF8(data) {
return data.replace(xmlReplacer, singleCharReplacer);
}
exports.escapeUTF8 = escapeUTF8;
function getASCIIEncoder(obj) {
return function (data) {
return data.replace(reEscapeChars, function (c) { return obj[c] || singleCharReplacer(c); });
};
}