Skip to content

Commit 98b0990

Browse files
authored
Improve parser and lexer (#811)
* convert source to a struct and fix minor issue displaying error * lexer: add byte and char pos * improve parser to use string instead of []rune * add lexer benchmarks * make lexer work as an iterator * make parser use the new iterator API in lexer * allow reusing the parser * cleanup code * add parser benchmarks
1 parent eeb1b8b commit 98b0990

File tree

12 files changed

+529
-243
lines changed

12 files changed

+529
-243
lines changed

file/error.go

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package file
33
import (
44
"fmt"
55
"strings"
6-
"unicode/utf8"
76
)
87

98
type Error struct {
@@ -19,43 +18,47 @@ func (e *Error) Error() string {
1918
return e.format()
2019
}
2120

21+
var tabReplacer = strings.NewReplacer("\t", " ")
22+
2223
func (e *Error) Bind(source Source) *Error {
24+
src := source.String()
25+
26+
var runeCount, lineStart int
2327
e.Line = 1
24-
for i, r := range source {
25-
if i == e.From {
28+
e.Column = 0
29+
for i, r := range src {
30+
if runeCount == e.From {
2631
break
2732
}
2833
if r == '\n' {
34+
lineStart = i
2935
e.Line++
3036
e.Column = 0
31-
} else {
32-
e.Column++
3337
}
38+
runeCount++
39+
e.Column++
40+
}
41+
42+
lineEnd := lineStart + strings.IndexByte(src[lineStart:], '\n')
43+
if lineEnd < lineStart {
44+
lineEnd = len(src)
45+
}
46+
if lineStart == lineEnd {
47+
return e
3448
}
35-
if snippet, found := source.Snippet(e.Line); found {
36-
snippet := strings.Replace(snippet, "\t", " ", -1)
37-
srcLine := "\n | " + snippet
38-
var bytes = []byte(snippet)
39-
var indLine = "\n | "
40-
for i := 0; i < e.Column && len(bytes) > 0; i++ {
41-
_, sz := utf8.DecodeRune(bytes)
42-
bytes = bytes[sz:]
43-
if sz > 1 {
44-
goto noind
45-
} else {
46-
indLine += "."
47-
}
48-
}
49-
if _, sz := utf8.DecodeRune(bytes); sz > 1 {
50-
goto noind
51-
} else {
52-
indLine += "^"
53-
}
54-
srcLine += indLine
5549

56-
noind:
57-
e.Snippet = srcLine
50+
const prefix = "\n | "
51+
line := src[lineStart:lineEnd]
52+
snippet := new(strings.Builder)
53+
snippet.Grow(2*len(prefix) + len(line) + e.Column + 1)
54+
snippet.WriteString(prefix)
55+
tabReplacer.WriteString(snippet, line)
56+
snippet.WriteString(prefix)
57+
for i := 0; i < e.Column; i++ {
58+
snippet.WriteByte('.')
5859
}
60+
snippet.WriteByte('^')
61+
e.Snippet = snippet.String()
5962
return e
6063
}
6164

file/source.go

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,36 @@
11
package file
22

3-
import (
4-
"strings"
5-
"unicode/utf8"
6-
)
3+
import "strings"
74

8-
type Source []rune
5+
type Source struct {
6+
raw string
7+
}
98

109
func NewSource(contents string) Source {
11-
return []rune(contents)
10+
return Source{
11+
raw: contents,
12+
}
1213
}
1314

1415
func (s Source) String() string {
15-
return string(s)
16+
return s.raw
1617
}
1718

1819
func (s Source) Snippet(line int) (string, bool) {
19-
if s == nil {
20+
if s.raw == "" {
2021
return "", false
2122
}
22-
lines := strings.Split(string(s), "\n")
23-
lineOffsets := make([]int, len(lines))
24-
var offset int
25-
for i, line := range lines {
26-
offset = offset + utf8.RuneCountInString(line) + 1
27-
lineOffsets[i] = offset
28-
}
29-
charStart, found := getLineOffset(lineOffsets, line)
30-
if !found || len(s) == 0 {
31-
return "", false
23+
var start int
24+
for i := 1; i < line; i++ {
25+
pos := strings.IndexByte(s.raw[start:], '\n')
26+
if pos < 0 {
27+
return "", false
28+
}
29+
start += pos + 1
3230
}
33-
charEnd, found := getLineOffset(lineOffsets, line+1)
34-
if found {
35-
return string(s[charStart : charEnd-1]), true
36-
}
37-
return string(s[charStart:]), true
38-
}
39-
40-
func getLineOffset(lineOffsets []int, line int) (int, bool) {
41-
if line == 1 {
42-
return 0, true
43-
} else if line > 1 && line <= len(lineOffsets) {
44-
offset := lineOffsets[line-2]
45-
return offset, true
31+
end := start + strings.IndexByte(s.raw[start:], '\n')
32+
if end < start {
33+
end = len(s.raw)
4634
}
47-
return -1, false
35+
return s.raw[start:end], true
4836
}

internal/ring/ring.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package ring
2+
3+
// Ring is a very simple ring buffer implementation that uses a slice. The
4+
// internal slice will only grow, never shrink. When it grows, it grows in
5+
// chunks of "chunkSize" (given as argument in the [New] function). Pointer and
6+
// reference types can be safely used because memory is cleared.
7+
type Ring[T any] struct {
8+
data []T
9+
back, len, chunkSize int
10+
}
11+
12+
func New[T any](chunkSize int) *Ring[T] {
13+
if chunkSize < 1 {
14+
panic("chunkSize must be greater than zero")
15+
}
16+
return &Ring[T]{
17+
chunkSize: chunkSize,
18+
}
19+
}
20+
21+
func (r *Ring[T]) Len() int {
22+
return r.len
23+
}
24+
25+
func (r *Ring[T]) Cap() int {
26+
return len(r.data)
27+
}
28+
29+
func (r *Ring[T]) Reset() {
30+
var zero T
31+
for i := range r.data {
32+
r.data[i] = zero // clear mem, optimized by the compiler, in Go 1.21 the "clear" builtin can be used
33+
}
34+
r.back = 0
35+
r.len = 0
36+
}
37+
38+
// Nth returns the n-th oldest value (zero-based) in the ring without making
39+
// any change.
40+
func (r *Ring[T]) Nth(n int) (v T, ok bool) {
41+
if n < 0 || n >= r.len || len(r.data) == 0 {
42+
return v, false
43+
}
44+
n = (n + r.back) % len(r.data)
45+
return r.data[n], true
46+
}
47+
48+
// Dequeue returns the oldest value.
49+
func (r *Ring[T]) Dequeue() (v T, ok bool) {
50+
if r.len == 0 {
51+
return v, false
52+
}
53+
v, r.data[r.back] = r.data[r.back], v // retrieve and clear mem
54+
r.len--
55+
r.back = (r.back + 1) % len(r.data)
56+
return v, true
57+
}
58+
59+
// Enqueue adds an item to the ring.
60+
func (r *Ring[T]) Enqueue(v T) {
61+
if r.len == len(r.data) {
62+
r.grow()
63+
}
64+
writePos := (r.back + r.len) % len(r.data)
65+
r.data[writePos] = v
66+
r.len++
67+
}
68+
69+
func (r *Ring[T]) grow() {
70+
s := make([]T, len(r.data)+r.chunkSize)
71+
if r.len > 0 {
72+
chunk1 := r.back + r.len
73+
if chunk1 > len(r.data) {
74+
chunk1 = len(r.data)
75+
}
76+
copied := copy(s, r.data[r.back:chunk1])
77+
78+
if copied < r.len { // wrapped slice
79+
chunk2 := r.len - copied
80+
copy(s[copied:], r.data[:chunk2])
81+
}
82+
}
83+
r.back = 0
84+
r.data = s
85+
}

internal/ring/ring_test.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
package ring
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
)
7+
8+
func TestRing(t *testing.T) {
9+
type op = ringOp[int]
10+
testRing(t, New[int](3),
11+
// noops on empty ring
12+
op{cap: 0, opType: opRst, value: 0, items: []int{}},
13+
op{cap: 0, opType: opDeq, value: 0, items: []int{}},
14+
15+
// basic
16+
op{cap: 3, opType: opEnq, value: 1, items: []int{1}},
17+
op{cap: 3, opType: opDeq, value: 1, items: []int{}},
18+
19+
// wrapping
20+
op{cap: 3, opType: opEnq, value: 2, items: []int{2}},
21+
op{cap: 3, opType: opEnq, value: 3, items: []int{2, 3}},
22+
op{cap: 3, opType: opEnq, value: 4, items: []int{2, 3, 4}},
23+
op{cap: 3, opType: opDeq, value: 2, items: []int{3, 4}},
24+
op{cap: 3, opType: opDeq, value: 3, items: []int{4}},
25+
op{cap: 3, opType: opDeq, value: 4, items: []int{}},
26+
27+
// resetting
28+
op{cap: 3, opType: opEnq, value: 2, items: []int{2}},
29+
op{cap: 3, opType: opRst, value: 0, items: []int{}},
30+
op{cap: 3, opType: opDeq, value: 0, items: []int{}},
31+
32+
// growing without wrapping
33+
op{cap: 3, opType: opEnq, value: 5, items: []int{5}},
34+
op{cap: 3, opType: opEnq, value: 6, items: []int{5, 6}},
35+
op{cap: 3, opType: opEnq, value: 7, items: []int{5, 6, 7}},
36+
op{cap: 6, opType: opEnq, value: 8, items: []int{5, 6, 7, 8}},
37+
op{cap: 6, opType: opRst, value: 0, items: []int{}},
38+
op{cap: 6, opType: opDeq, value: 0, items: []int{}},
39+
40+
// growing and wrapping
41+
op{cap: 6, opType: opEnq, value: 9, items: []int{9}},
42+
op{cap: 6, opType: opEnq, value: 10, items: []int{9, 10}},
43+
op{cap: 6, opType: opEnq, value: 11, items: []int{9, 10, 11}},
44+
op{cap: 6, opType: opEnq, value: 12, items: []int{9, 10, 11, 12}},
45+
op{cap: 6, opType: opEnq, value: 13, items: []int{9, 10, 11, 12, 13}},
46+
op{cap: 6, opType: opEnq, value: 14, items: []int{9, 10, 11, 12, 13, 14}},
47+
op{cap: 6, opType: opDeq, value: 9, items: []int{10, 11, 12, 13, 14}},
48+
op{cap: 6, opType: opDeq, value: 10, items: []int{11, 12, 13, 14}},
49+
op{cap: 6, opType: opEnq, value: 15, items: []int{11, 12, 13, 14, 15}},
50+
op{cap: 6, opType: opEnq, value: 16, items: []int{11, 12, 13, 14, 15, 16}},
51+
op{cap: 9, opType: opEnq, value: 17, items: []int{11, 12, 13, 14, 15, 16, 17}}, // grows wrapped
52+
op{cap: 9, opType: opDeq, value: 11, items: []int{12, 13, 14, 15, 16, 17}},
53+
op{cap: 9, opType: opDeq, value: 12, items: []int{13, 14, 15, 16, 17}},
54+
op{cap: 9, opType: opDeq, value: 13, items: []int{14, 15, 16, 17}},
55+
op{cap: 9, opType: opDeq, value: 14, items: []int{15, 16, 17}},
56+
op{cap: 9, opType: opDeq, value: 15, items: []int{16, 17}},
57+
op{cap: 9, opType: opDeq, value: 16, items: []int{17}},
58+
op{cap: 9, opType: opDeq, value: 17, items: []int{}},
59+
op{cap: 9, opType: opDeq, value: 0, items: []int{}},
60+
)
61+
62+
t.Run("should panic on invalid chunkSize", func(t *testing.T) {
63+
defer func() {
64+
if r := recover(); r == nil {
65+
t.Fatalf("should have panicked")
66+
}
67+
}()
68+
New[int](0)
69+
})
70+
}
71+
72+
const (
73+
opEnq = iota // enqueue an item
74+
opDeq // dequeue an item and an item was available
75+
opRst // reset
76+
)
77+
78+
type ringOp[T comparable] struct {
79+
cap int // expected values
80+
opType int // opEnq or opDeq
81+
value T // value to enqueue or value expected for dequeue; ignored for opRst
82+
items []T // items left
83+
}
84+
85+
func testRing[T comparable](t *testing.T, r *Ring[T], ops ...ringOp[T]) {
86+
for i, op := range ops {
87+
testOK := t.Run(fmt.Sprintf("opIndex=%v", i), func(t *testing.T) {
88+
testRingOp(t, r, op)
89+
})
90+
if !testOK {
91+
return
92+
}
93+
}
94+
}
95+
96+
func testRingOp[T comparable](t *testing.T, r *Ring[T], op ringOp[T]) {
97+
var zero T
98+
switch op.opType {
99+
case opEnq:
100+
r.Enqueue(op.value)
101+
case opDeq:
102+
shouldSucceed := r.Len() > 0
103+
v, ok := r.Dequeue()
104+
switch {
105+
case ok != shouldSucceed:
106+
t.Fatalf("should have succeeded: %v", shouldSucceed)
107+
case ok && v != op.value:
108+
t.Fatalf("expected value: %v; got: %v", op.value, v)
109+
case !ok && v != zero:
110+
t.Fatalf("expected zero value; got: %v", v)
111+
}
112+
case opRst:
113+
r.Reset()
114+
}
115+
if c := r.Cap(); c != op.cap {
116+
t.Fatalf("expected cap: %v; got: %v", op.cap, c)
117+
}
118+
if l := r.Len(); l != len(op.items) {
119+
t.Errorf("expected Len(): %v; got: %v", len(op.items), l)
120+
}
121+
var got []T
122+
for i := 0; ; i++ {
123+
v, ok := r.Nth(i)
124+
if !ok {
125+
break
126+
}
127+
got = append(got, v)
128+
}
129+
if l := len(got); l != len(op.items) {
130+
t.Errorf("expected items: %v\ngot items: %v", op.items, got)
131+
}
132+
for i := range op.items {
133+
if op.items[i] != got[i] {
134+
t.Fatalf("expected items: %v\ngot items: %v", op.items, got)
135+
}
136+
}
137+
if v, ok := r.Nth(len(op.items)); ok || v != zero {
138+
t.Fatalf("expected no more items, got: v=%v; ok=%v", v, ok)
139+
}
140+
}

parser/bench_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package parser
2+
3+
import "testing"
4+
5+
func BenchmarkParser(b *testing.B) {
6+
const source = `
7+
/*
8+
Showing worst case scenario
9+
*/
10+
let value = trim("contains escapes \n\"\\ \U0001F600 and non ASCII ñ"); // inline comment
11+
len(value) == 0x2A
12+
// let's introduce an error too
13+
whatever
14+
`
15+
b.ReportAllocs()
16+
p := new(Parser)
17+
for i := 0; i < b.N; i++ {
18+
p.Parse(source, nil)
19+
}
20+
}

0 commit comments

Comments
 (0)