Tokenizer.d.ts 5.56 KB
/** All the states the tokenizer can be in. */
declare const enum State {
    Text = 1,
    BeforeTagName = 2,
    InTagName = 3,
    InSelfClosingTag = 4,
    BeforeClosingTagName = 5,
    InClosingTagName = 6,
    AfterClosingTagName = 7,
    BeforeAttributeName = 8,
    InAttributeName = 9,
    AfterAttributeName = 10,
    BeforeAttributeValue = 11,
    InAttributeValueDq = 12,
    InAttributeValueSq = 13,
    InAttributeValueNq = 14,
    BeforeDeclaration = 15,
    InDeclaration = 16,
    InProcessingInstruction = 17,
    BeforeComment = 18,
    InComment = 19,
    InSpecialComment = 20,
    AfterComment1 = 21,
    AfterComment2 = 22,
    BeforeCdata1 = 23,
    BeforeCdata2 = 24,
    BeforeCdata3 = 25,
    BeforeCdata4 = 26,
    BeforeCdata5 = 27,
    BeforeCdata6 = 28,
    InCdata = 29,
    AfterCdata1 = 30,
    AfterCdata2 = 31,
    BeforeSpecialS = 32,
    BeforeSpecialSEnd = 33,
    BeforeScript1 = 34,
    BeforeScript2 = 35,
    BeforeScript3 = 36,
    BeforeScript4 = 37,
    BeforeScript5 = 38,
    AfterScript1 = 39,
    AfterScript2 = 40,
    AfterScript3 = 41,
    AfterScript4 = 42,
    AfterScript5 = 43,
    BeforeStyle1 = 44,
    BeforeStyle2 = 45,
    BeforeStyle3 = 46,
    BeforeStyle4 = 47,
    AfterStyle1 = 48,
    AfterStyle2 = 49,
    AfterStyle3 = 50,
    AfterStyle4 = 51,
    BeforeSpecialT = 52,
    BeforeSpecialTEnd = 53,
    BeforeTitle1 = 54,
    BeforeTitle2 = 55,
    BeforeTitle3 = 56,
    BeforeTitle4 = 57,
    AfterTitle1 = 58,
    AfterTitle2 = 59,
    AfterTitle3 = 60,
    AfterTitle4 = 61,
    BeforeEntity = 62,
    BeforeNumericEntity = 63,
    InNamedEntity = 64,
    InNumericEntity = 65,
    InHexEntity = 66
}
export interface Callbacks {
    onattribdata(value: string): void;
    onattribend(quote: string | undefined | null): void;
    onattribname(name: string): void;
    oncdata(data: string): void;
    onclosetag(name: string): void;
    oncomment(data: string): void;
    ondeclaration(content: string): void;
    onend(): void;
    onerror(error: Error, state?: State): void;
    onopentagend(): void;
    onopentagname(name: string): void;
    onprocessinginstruction(instruction: string): void;
    onselfclosingtag(): void;
    ontext(value: string): void;
}
export default class Tokenizer {
    /** The current state the tokenizer is in. */
    _state: State;
    /** The read buffer. */
    private buffer;
    /** The beginning of the section that is currently being read. */
    sectionStart: number;
    /** The index within the buffer that we are currently looking at. */
    _index: number;
    /**
     * Data that has already been processed will be removed from the buffer occasionally.
     * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
     */
    private bufferOffset;
    /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
    private baseState;
    /** For special parsing behavior inside of script and style tags. */
    private special;
    /** Indicates whether the tokenizer has been paused. */
    private running;
    /** Indicates whether the tokenizer has finished running / `.end` has been called. */
    private ended;
    private readonly cbs;
    private readonly xmlMode;
    private readonly decodeEntities;
    constructor(options: {
        xmlMode?: boolean;
        decodeEntities?: boolean;
    } | null, cbs: Callbacks);
    reset(): void;
    write(chunk: string): void;
    end(chunk?: string): void;
    pause(): void;
    resume(): void;
    /**
     * The current index within all of the written data.
     */
    getAbsoluteIndex(): number;
    private stateText;
    /**
     * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
     *
     * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
     * We allow anything that wouldn't end the tag.
     */
    private isTagStartChar;
    private stateBeforeTagName;
    private stateInTagName;
    private stateBeforeClosingTagName;
    private stateInClosingTagName;
    private stateAfterClosingTagName;
    private stateBeforeAttributeName;
    private stateInSelfClosingTag;
    private stateInAttributeName;
    private stateAfterAttributeName;
    private stateBeforeAttributeValue;
    private handleInAttributeValue;
    private stateInAttributeValueDoubleQuotes;
    private stateInAttributeValueSingleQuotes;
    private stateInAttributeValueNoQuotes;
    private stateBeforeDeclaration;
    private stateInDeclaration;
    private stateInProcessingInstruction;
    private stateBeforeComment;
    private stateInComment;
    private stateInSpecialComment;
    private stateAfterComment1;
    private stateAfterComment2;
    private stateBeforeCdata6;
    private stateInCdata;
    private stateAfterCdata1;
    private stateAfterCdata2;
    private stateBeforeSpecialS;
    private stateBeforeSpecialSEnd;
    private stateBeforeSpecialLast;
    private stateAfterSpecialLast;
    private parseFixedEntity;
    private parseLegacyEntity;
    private stateInNamedEntity;
    private decodeNumericEntity;
    private stateInNumericEntity;
    private stateInHexEntity;
    private cleanup;
    /**
     * Iterates through the buffer, calling the function corresponding to the current state.
     *
     * States that are more likely to be hit are higher up, as a performance improvement.
     */
    private parse;
    private finish;
    private handleTrailingData;
    private getSection;
    private emitToken;
    private emitPartial;
}
export {};
//# sourceMappingURL=Tokenizer.d.ts.map