Skip to content

Commit 7295086

Browse files
authored
Merge pull request #123 from no-context/unicode-flags
Allow unicode flag if all RegExps use it
2 parents 6d6bfa4 + 02c064f commit 7295086

File tree

2 files changed

+47
-7
lines changed

2 files changed

+47
-7
lines changed

moo.js

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
if (obj.global) throw new Error('RegExp /g flag is implied')
4747
if (obj.sticky) throw new Error('RegExp /y flag is implied')
4848
if (obj.multiline) throw new Error('RegExp /m flag is implied')
49-
if (obj.unicode) throw new Error('RegExp /u flag is not allowed')
5049
return obj.source
5150

5251
} else {
@@ -154,6 +153,7 @@
154153
var errorRule = null
155154
var fast = Object.create(null)
156155
var fastAllowed = true
156+
var unicodeFlag = null
157157
var groups = []
158158
var parts = []
159159

@@ -210,6 +210,20 @@
210210

211211
groups.push(options)
212212

213+
// Check unicode flag is used everywhere or nowhere
214+
for (var j = 0; j < match.length; j++) {
215+
var obj = match[j]
216+
if (!isRegExp(obj)) {
217+
continue
218+
}
219+
220+
if (unicodeFlag === null) {
221+
unicodeFlag = obj.unicode
222+
} else if (unicodeFlag !== obj.unicode) {
223+
throw new Error("If one rule is /u then all must be")
224+
}
225+
}
226+
213227
// convert to RegExp
214228
var pat = reUnion(match.map(regexpOrLiteral))
215229

@@ -241,8 +255,9 @@
241255
var fallbackRule = errorRule && errorRule.fallback
242256
var flags = hasSticky && !fallbackRule ? 'ym' : 'gm'
243257
var suffix = hasSticky || fallbackRule ? '' : '|'
244-
var combined = new RegExp(reUnion(parts) + suffix, flags)
245258

259+
if (unicodeFlag === true) flags += "u"
260+
var combined = new RegExp(reUnion(parts) + suffix, flags)
246261
return {regexp: combined, groups: groups, fast: fast, error: errorRule || defaultErrorRule}
247262
}
248263

test/test.js

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
const fs = require('fs')
32
const vm = require('vm')
43

@@ -29,17 +28,14 @@ describe('compiler', () => {
2928
expect(lex4.next()).toMatchObject({type: 'err', text: 'nope!'})
3029
})
3130

32-
test("warns for /g, /y, /i, /m, /u", () => {
31+
test("warns for /g, /y, /i, /m", () => {
3332
expect(() => compile({ word: /foo/ })).not.toThrow()
3433
expect(() => compile({ word: /foo/g })).toThrow('implied')
3534
expect(() => compile({ word: /foo/i })).toThrow('not allowed')
3635
expect(() => compile({ word: /foo/y })).toThrow('implied')
3736
expect(() => compile({ word: /foo/m })).toThrow('implied')
38-
expect(() => compile({ word: /foo/u })).toThrow('not allowed')
3937
})
4038

41-
// TODO warns if no lineBreaks: true
42-
4339
test('warns about missing states', () => {
4440
const rules = [
4541
{match: '=', next: 'missing'},
@@ -1186,3 +1182,32 @@ describe('include', () => {
11861182
])
11871183
})
11881184
})
1185+
1186+
1187+
describe("unicode flag", () => {
1188+
1189+
test("allows all rules to be /u", () => {
1190+
expect(() => compile({ a: /foo/u, b: /bar/u, c: "quxx" })).not.toThrow()
1191+
expect(() => compile({ a: /foo/u, b: /bar/, c: "quxx" })).toThrow("If one rule is /u then all must be")
1192+
expect(() => compile({ a: /foo/, b: /bar/u, c: "quxx" })).toThrow("If one rule is /u then all must be")
1193+
})
1194+
1195+
test("supports unicode", () => {
1196+
const lexer = compile({
1197+
a: /[𝌆]/u,
1198+
})
1199+
lexer.reset("𝌆")
1200+
expect(lexer.next()).toMatchObject({value: "𝌆"})
1201+
lexer.reset("𝌆".charCodeAt(0))
1202+
expect(() => lexer.next()).toThrow()
1203+
1204+
const lexer2 = compile({
1205+
a: /\u{1D356}/u,
1206+
})
1207+
lexer2.reset("𝍖")
1208+
expect(lexer2.next()).toMatchObject({value: "𝍖"})
1209+
lexer2.reset("\\u{1D356}")
1210+
expect(() => lexer2.next()).toThrow()
1211+
})
1212+
1213+
})

0 commit comments

Comments
 (0)