Skip to content

Commit 074f3c1

Browse files
author
Robert Jackson
authored
Merge pull request #71 from rwjblue/doctype
2 parents 220bf73 + c3223ab commit 074f3c1

File tree

4 files changed

+423
-1
lines changed

4 files changed

+423
-1
lines changed

src/evented-tokenizer.ts

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,194 @@ export default class EventedTokenizer {
185185
this.consume();
186186
this.transitionTo(TokenizerState.commentStart);
187187
this.delegate.beginComment();
188+
} else {
189+
let maybeDoctype = char.toUpperCase() + this.input.substring(this.index, this.index + 6).toUpperCase();
190+
191+
if (maybeDoctype === 'DOCTYPE') {
192+
this.consume();
193+
this.consume();
194+
this.consume();
195+
this.consume();
196+
this.consume();
197+
this.consume();
198+
this.transitionTo(TokenizerState.doctype);
199+
if (this.delegate.beginDoctype) this.delegate.beginDoctype();
200+
}
201+
}
202+
},
203+
204+
doctype() {
205+
let char = this.consume();
206+
207+
if (isSpace(char)) {
208+
this.transitionTo(TokenizerState.beforeDoctypeName);
209+
}
210+
},
211+
212+
beforeDoctypeName() {
213+
let char = this.consume();
214+
215+
if (isSpace(char)) {
216+
return;
217+
} else {
218+
this.transitionTo(TokenizerState.doctypeName);
219+
if (this.delegate.appendToDoctypeName) this.delegate.appendToDoctypeName(char.toLowerCase());
220+
}
221+
},
222+
223+
doctypeName() {
224+
let char = this.consume();
225+
226+
if (isSpace(char)) {
227+
this.transitionTo(TokenizerState.afterDoctypeName);
228+
} else if (char === '>') {
229+
if (this.delegate.endDoctype) this.delegate.endDoctype();
230+
this.transitionTo(TokenizerState.beforeData);
231+
} else {
232+
if (this.delegate.appendToDoctypeName) this.delegate.appendToDoctypeName(char.toLowerCase());
233+
}
234+
},
235+
236+
afterDoctypeName() {
237+
let char = this.consume();
238+
239+
if (isSpace(char)) {
240+
return;
241+
} else if (char === '>') {
242+
if (this.delegate.endDoctype) this.delegate.endDoctype();
243+
this.transitionTo(TokenizerState.beforeData);
244+
} else {
245+
let nextSixChars = char.toUpperCase() + this.input.substring(this.index, this.index + 5).toUpperCase();
246+
247+
let isPublic = nextSixChars.toUpperCase() === 'PUBLIC';
248+
let isSystem = nextSixChars.toUpperCase() === 'SYSTEM';
249+
250+
if (isPublic || isSystem) {
251+
this.consume();
252+
this.consume();
253+
this.consume();
254+
this.consume();
255+
this.consume();
256+
this.consume();
257+
}
258+
259+
if (isPublic) {
260+
this.transitionTo(TokenizerState.afterDoctypePublicKeyword);
261+
} else if (isSystem) {
262+
this.transitionTo(TokenizerState.afterDoctypeSystemKeyword);
263+
}
264+
}
265+
},
266+
267+
afterDoctypePublicKeyword() {
268+
let char = this.peek();
269+
270+
if (isSpace(char)) {
271+
this.transitionTo(TokenizerState.beforeDoctypePublicIdentifier);
272+
this.consume();
273+
} else if (char === '"') {
274+
this.transitionTo(TokenizerState.doctypePublicIdentifierDoubleQuoted);
275+
this.consume();
276+
} else if (char === "'") {
277+
this.transitionTo(TokenizerState.doctypePublicIdentifierSingleQuoted);
278+
this.consume();
279+
} else if (char === '>') {
280+
this.consume();
281+
if (this.delegate.endDoctype) this.delegate.endDoctype();
282+
this.transitionTo(TokenizerState.beforeData);
283+
}
284+
},
285+
286+
doctypePublicIdentifierDoubleQuoted() {
287+
let char = this.consume();
288+
289+
if (char === '"') {
290+
this.transitionTo(TokenizerState.afterDoctypePublicIdentifier);
291+
} else if (char === '>') {
292+
if (this.delegate.endDoctype) this.delegate.endDoctype();
293+
this.transitionTo(TokenizerState.beforeData);
294+
} else {
295+
if (this.delegate.appendToDoctypePublicIdentifier) this.delegate.appendToDoctypePublicIdentifier(char);
296+
}
297+
},
298+
299+
doctypePublicIdentifierSingleQuoted() {
300+
let char = this.consume();
301+
302+
if (char === "'") {
303+
this.transitionTo(TokenizerState.afterDoctypePublicIdentifier);
304+
} else if (char === '>') {
305+
if (this.delegate.endDoctype) this.delegate.endDoctype();
306+
this.transitionTo(TokenizerState.beforeData);
307+
} else {
308+
if (this.delegate.appendToDoctypePublicIdentifier) this.delegate.appendToDoctypePublicIdentifier(char);
309+
}
310+
},
311+
312+
afterDoctypePublicIdentifier() {
313+
let char = this.consume();
314+
315+
if (isSpace(char)) {
316+
this.transitionTo(TokenizerState.betweenDoctypePublicAndSystemIdentifiers);
317+
} else if (char === '>') {
318+
if (this.delegate.endDoctype) this.delegate.endDoctype();
319+
this.transitionTo(TokenizerState.beforeData);
320+
} else if (char === '"') {
321+
this.transitionTo(TokenizerState.doctypeSystemIdentifierDoubleQuoted);
322+
} else if (char === "'") {
323+
this.transitionTo(TokenizerState.doctypeSystemIdentifierSingleQuoted);
324+
}
325+
},
326+
327+
betweenDoctypePublicAndSystemIdentifiers() {
328+
let char = this.consume();
329+
330+
if (isSpace(char)) {
331+
return;
332+
} else if (char === '>') {
333+
if (this.delegate.endDoctype) this.delegate.endDoctype();
334+
this.transitionTo(TokenizerState.beforeData);
335+
} else if (char === '"') {
336+
this.transitionTo(TokenizerState.doctypeSystemIdentifierDoubleQuoted);
337+
} else if (char === "'") {
338+
this.transitionTo(TokenizerState.doctypeSystemIdentifierSingleQuoted);
339+
}
340+
},
341+
342+
doctypeSystemIdentifierDoubleQuoted() {
343+
let char = this.consume();
344+
345+
if (char === '"') {
346+
this.transitionTo(TokenizerState.afterDoctypeSystemIdentifier);
347+
} else if (char === '>') {
348+
if (this.delegate.endDoctype) this.delegate.endDoctype();
349+
this.transitionTo(TokenizerState.beforeData);
350+
} else {
351+
if (this.delegate.appendToDoctypeSystemIdentifier) this.delegate.appendToDoctypeSystemIdentifier(char);
352+
}
353+
},
354+
355+
doctypeSystemIdentifierSingleQuoted() {
356+
let char = this.consume();
357+
358+
if (char === "'") {
359+
this.transitionTo(TokenizerState.afterDoctypeSystemIdentifier);
360+
} else if (char === '>') {
361+
if (this.delegate.endDoctype) this.delegate.endDoctype();
362+
this.transitionTo(TokenizerState.beforeData);
363+
} else {
364+
if (this.delegate.appendToDoctypeSystemIdentifier) this.delegate.appendToDoctypeSystemIdentifier(char);
365+
}
366+
},
367+
368+
afterDoctypeSystemIdentifier() {
369+
let char = this.consume();
370+
371+
if (isSpace(char)) {
372+
return;
373+
} else if (char === '>') {
374+
if (this.delegate.endDoctype) this.delegate.endDoctype();
375+
this.transitionTo(TokenizerState.beforeData);
188376
}
189377
},
190378

src/tokenizer.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,41 @@ export default class Tokenizer implements TokenizerDelegate {
9999

100100
// Data
101101

102+
beginDoctype() {
103+
this.push({
104+
type: TokenType.Doctype,
105+
name: '',
106+
});
107+
}
108+
109+
appendToDoctypeName(char: string) {
110+
this.current(TokenType.Doctype).name += char;
111+
}
112+
113+
appendToDoctypePublicIdentifier(char: string) {
114+
let doctype = this.current(TokenType.Doctype);
115+
116+
if (doctype.publicIdentifier === undefined) {
117+
doctype.publicIdentifier = char;
118+
} else {
119+
doctype.publicIdentifier += char;
120+
}
121+
}
122+
123+
appendToDoctypeSystemIdentifier(char: string) {
124+
let doctype = this.current(TokenType.Doctype);
125+
126+
if (doctype.systemIdentifier === undefined) {
127+
doctype.systemIdentifier = char;
128+
} else {
129+
doctype.systemIdentifier += char;
130+
}
131+
}
132+
133+
endDoctype() {
134+
this.addLocInfo();
135+
}
136+
102137
beginData() {
103138
this.push({
104139
type: TokenType.Chars,

src/types.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ export interface TokenBase<T extends TokenType> {
3030
loc?: Location;
3131
}
3232

33+
export interface Doctype extends TokenBase<TokenType.Doctype> {
34+
name: string;
35+
publicIdentifier?: string;
36+
systemIdentifier?: string;
37+
}
38+
3339
export interface StartTag extends TokenBase<TokenType.StartTag> {
3440
tagName: string;
3541
attributes: Attribute[];
@@ -48,9 +54,10 @@ export interface Comment extends TokenBase<TokenType.Comment> {
4854
chars: string;
4955
}
5056

51-
export type Token = StartTag | EndTag | Chars | Comment;
57+
export type Token = StartTag | EndTag | Chars | Comment | Doctype;
5258

5359
export const enum TokenType {
60+
Doctype = 'Doctype',
5461
StartTag = 'StartTag',
5562
EndTag = 'EndTag',
5663
Chars = 'Chars',
@@ -62,13 +69,21 @@ export interface TokenMap {
6269
EndTag: EndTag;
6370
Chars: Chars;
6471
Comment: Comment;
72+
Doctype: Doctype;
6573
}
6674

6775
export interface TokenizerDelegate {
6876
reset(): void;
6977
finishData(): void;
7078
tagOpen(): void;
7179

80+
// TODO: make these non-optional in preparation for the next major version release
81+
beginDoctype?(): void;
82+
appendToDoctypeName?(char: string): void;
83+
appendToDoctypePublicIdentifier?(char: string): void;
84+
appendToDoctypeSystemIdentifier?(char: string): void;
85+
endDoctype?(): void;
86+
7287
beginData(): void;
7388
appendToData(char: string): void;
7489

0 commit comments

Comments
 (0)