Skip to content

Commit 40799e2

Browse files
committed
fix: refine newline handling in lexer and update test snapshots
1 parent 4380ad2 commit 40799e2

File tree

5 files changed

+69
-38
lines changed

5 files changed

+69
-38
lines changed

.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,5 +65,8 @@
6565
"jsonc",
6666
"yaml",
6767
"toml"
68+
],
69+
"cSpell.words": [
70+
"unplugin"
6871
]
6972
}

src/core/context/generator.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export class Generator {
88
walk(node: SimpleNode): string | void {
99
switch (node.type) {
1010
case 'Program':
11-
return node.body.map(this.walk.bind(this)).filter((n: any) => !!n && n !== '\r\n').join('')
11+
return node.body.map(this.walk.bind(this)).filter((n: any) => !!n).join('')
1212
case 'CodeStatement':
1313
return node.value
1414
}

src/core/context/lexer.ts

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,54 +13,48 @@ export class Lexer {
1313
scanner:
1414
while (this.current < code.length) {
1515
const startIndex = this.current
16-
// 查找最近的换行符(\r\n, \n, \r)
17-
const nextCR = code.indexOf('\r', startIndex)
18-
const nextLF = code.indexOf('\n', startIndex)
16+
17+
// 使用正则表达式匹配换行符,更优雅地处理各种换行符类型
18+
const newlineMatch = code.slice(startIndex).match(/(\r\n|\n|\r)/)
19+
1920
let endIndex: number
21+
let nextIndex: number
22+
let newlineChar: string | undefined
2023

21-
if (nextCR === -1 && nextLF === -1) {
22-
// 没有找到换行符,说明是最后一行
23-
endIndex = code.length
24-
}
25-
else if (nextCR === -1) {
26-
// 只有 \n
27-
endIndex = nextLF
28-
}
29-
else if (nextLF === -1) {
30-
// 只有 \r
31-
endIndex = nextCR
32-
}
33-
else if (nextCR < nextLF) {
34-
// 如果是 \r\n,跳过 \n
35-
endIndex = nextCR
36-
if (nextLF === nextCR + 1) {
37-
endIndex += 2
38-
}
24+
if (newlineMatch) {
25+
newlineChar = newlineMatch[0]
26+
endIndex = startIndex + newlineMatch.index!
27+
nextIndex = endIndex + newlineChar.length
3928
}
4029
else {
41-
// \n
42-
endIndex = nextLF
30+
// 没有找到换行符,说明是最后一行
31+
endIndex = code.length
32+
nextIndex = code.length
4333
}
4434

45-
const rawLine = code.slice(startIndex, endIndex)
46-
const line = rawLine.trim()
47-
if (isComment(line)) {
35+
// 获取原始行内容,对于 code 类型,我们需要包含换行符
36+
const rawLine = code.slice(startIndex, nextIndex)
37+
const lineWithoutNewline = code.slice(startIndex, endIndex).trim()
38+
39+
if (isComment(lineWithoutNewline)) {
4840
for (const lex of this.lexers) {
49-
const comment = parseComment(line)
41+
const comment = parseComment(lineWithoutNewline)
5042

5143
const token = lex.bind(this)(comment.content!)
5244
if (token) {
5345
this.tokens.push({ comment: comment.type, ...token })
54-
this.current = endIndex
46+
this.current = nextIndex
5547
continue scanner
5648
}
5749
}
5850
}
51+
52+
// 对于 code 类型,保留原始行内容(包括换行符)
5953
this.tokens.push({
6054
type: 'code',
6155
value: rawLine,
6256
} as CodeToken)
63-
this.current = endIndex
57+
this.current = nextIndex
6458
}
6559
return this.tokens
6660
}

test/__snapshots__/if.test.ts.snap

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,18 @@
33
exports[`if > should parse if.css, dev = false 1`] = `
44
"body {
55
}
6+
7+
68
body {
79
content: "!DEV";
810
}
11+
12+
913
body {
1014
content: "!DEV";
1115
}
16+
17+
1218
body {
1319
}
1420
"
@@ -18,44 +24,62 @@ exports[`if > should parse if.css, dev = true 1`] = `
1824
"body {
1925
content: "DEV";
2026
}
27+
28+
2129
body {
2230
content: "!DEV else";
2331
}
32+
33+
2434
body {
2535
content: "TEST";
2636
}
37+
38+
2739
body {
2840
content: "else";
2941
}
3042
"
3143
`;
3244

3345
exports[`if > should parse if.html, dev = false 1`] = `
34-
"<div>!DEV</div>
46+
"
3547
<div>!DEV</div>
48+
49+
<div>!DEV</div>
50+
3651
"
3752
`;
3853

3954
exports[`if > should parse if.html, dev = true 1`] = `
4055
"<div>DEV</div>
56+
4157
<div>!DEV else</div>
58+
4259
<div>TEST</div>
60+
4361
<div>
4462
<div>else</div>
4563
</div>
4664
"
4765
`;
4866

4967
exports[`if > should parse if.js, dev = false 1`] = `
50-
"console.log('!DEV')
68+
"
69+
console.log('!DEV')
70+
5171
console.log('!DEV')
72+
5273
"
5374
`;
5475

5576
exports[`if > should parse if.js, dev = true 1`] = `
5677
"console.log('DEV')
78+
5779
console.log('!DEV else')
80+
5881
console.log('TEST')
82+
5983
console.log('else')
6084
"
6185
`;
@@ -76,23 +100,31 @@ exports[`if > should parse if.jsx, dev = true 1`] = `
76100

77101
exports[`if > should parse if.vue, dev = false 1`] = `
78102
"<script setup lang="ts">
103+
79104
</script>
105+
80106
<template>
81107
<pre>{
82108
}</pre>
83109
</template>
110+
84111
<style>
112+
85113
</style>"
86114
`;
87115

88116
exports[`if > should parse if.vue, dev = true 1`] = `
89117
"<script setup lang="ts">
118+
90119
</script>
120+
91121
<template>
92122
<pre>{
93123
"data": 'value' // comment
94124
}</pre>
95125
</template>
126+
96127
<style>
128+
97129
</style>"
98130
`;

test/lexer.test.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@ describe('lexer', () => {
99
// Another comment
1010
const baz = 'qux';`
1111
const expectedTokens = [
12-
{ type: 'code', value: '// This is a comment' },
13-
{ type: 'code', value: 'const foo = \'bar\';' },
14-
{ type: 'code', value: '// Another comment' },
15-
{ type: 'code', value: 'const baz = \'qux\';' },
12+
{ type: 'code', value: '\n' },
13+
{ type: 'code', value: ' // This is a comment\n' },
14+
{ type: 'code', value: ' const foo = \'bar\';\n' },
15+
{ type: 'code', value: ' // Another comment\n' },
16+
{ type: 'code', value: ' const baz = \'qux\';' },
1617
]
1718

1819
const tokens = Lexer.lex(code)
@@ -25,8 +26,9 @@ describe('lexer', () => {
2526
const foo = 'bar';
2627
const baz = 'qux';`
2728
const expectedTokens = [
28-
{ type: 'code', value: 'const foo = \'bar\';' },
29-
{ type: 'code', value: 'const baz = \'qux\';' },
29+
{ type: 'code', value: '\n' },
30+
{ type: 'code', value: ' const foo = \'bar\';\n' },
31+
{ type: 'code', value: ' const baz = \'qux\';' },
3032
]
3133

3234
const tokens = Lexer.lex(code)

0 commit comments

Comments
 (0)