Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,37 @@ func unescapeIdent(s string) string {
}
}

func unescapeUTFSymbols(s string) string {
n := strings.IndexByte(s, '\\')
if n < 0 {
return s
}
dst := make([]byte, 0, len(s))
for {
dst = append(dst, s[:n]...)
s = s[n+1:]
if isUTFEscapePrefix(s) {
r, size := decodeUTFEscapeSequence(s)
if r == utf8.RuneError {
// Cannot decode escape sequence. Put it in the output as is
dst = append(dst, '\\')
} else {
dst = utf8.AppendRune(dst, r)
s = s[size:]
}
} else {
// Save non-UTF escape sequence as is
dst = append(dst, '\\')
}

n = strings.IndexByte(s, '\\')
if n < 0 {
dst = append(dst, s...)
return string(dst)
}
}
}

func appendEscapedIdent(dst []byte, s string) []byte {
i := 0
for i < len(s) {
Expand Down Expand Up @@ -728,7 +759,33 @@ func appendEscapeSequence(dst []byte, r rune) []byte {
return append(dst, 'u', toHex(byte(r>>12)), toHex(byte((r>>8)&0xf)), toHex(byte(r>>4)), toHex(byte(r&0xf)))
}

func decodeEscapeSequence(s string) (rune, int) {
// checks if string has one of supported escape sequences
// supported: \x, \X, \u, \U
func hasUTFEscapedSymbols(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] == '\\' && i+1 < len(s) {
switch s[i+1] {
case 'x', 'X', 'u', 'U':
return true
}
}
}
return false
}

func isUTFEscapePrefix(s string) bool {
if len(s) == 0 {
return false
}

switch s[0] {
case 'x', 'X', 'u', 'U':
return true
}
return false
}

func decodeUTFEscapeSequence(s string) (rune, int) {
if strings.HasPrefix(s, "x") || strings.HasPrefix(s, "X") {
if len(s) >= 3 {
h1 := fromHex(s[1])
Expand All @@ -752,6 +809,15 @@ func decodeEscapeSequence(s string) (rune, int) {
}
return utf8.RuneError, 0
}
// Improperly escaped non-printable char
return utf8.RuneError, 0
}

func decodeEscapeSequence(s string) (rune, int) {
if isUTFEscapePrefix(s) {
return decodeUTFEscapeSequence(s)
}

r, size := utf8.DecodeRuneInString(s)
if unicode.IsPrint(r) {
return r, size
Expand Down
3 changes: 3 additions & 0 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,9 @@ func extractStringValue(token string) (string, error) {
token = strings.Replace(token, `"`, `\"`, -1)
token = `"` + token + `"`
}
if hasUTFEscapedSymbols(token) {
token = unescapeUTFSymbols(token)
}
s, err := strconv.Unquote(token)
if err != nil {
return "", fmt.Errorf(`cannot parse string literal %q: %s`, token, err)
Expand Down
6 changes: 6 additions & 0 deletions parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ func TestParseSuccess(t *testing.T) {
another(`\温\度{\房\间="水电费"}[5m] offset 10m`, `温度{房间="水电费"}[5m] offset 10m`)
same(`sum(fo\|o) by(b\|a,x)`)
another(`sum(x) by (b\x7Ca)`, `sum(x) by(b\|a)`)
another(`fo\xF3`, `foó`)
another(`fo\u00F3`, `foó`)
another(`{__name__="fo\xF3"}`, `foó`)
another(`{__name__="fo\xF3"}`, `foó`)
another(`"\n\tfo\xF3"`, `"\n\tfoó"`)
another(`温度{房间="水电费\xF3"}[5m] offset 10m`, `温度{房间="水电费ó"}[5m] offset 10m`)

// Duplicate filters
same(`foo{__name__="bar"}`)
Expand Down