Skip to content

Commit b0c255f

Browse files
authored
Merge pull request #188 from wader/binary-regexp-explode-clenaup
interp: Cleanup binary regexp overloading and add explode
2 parents c298ed7 + dff7e7d commit b0c255f

File tree

9 files changed

+362
-312
lines changed

9 files changed

+362
-312
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@
171171
},
172172
"[jq]": {
173173
"editor.tabSize": 2,
174+
"files.trimTrailingWhitespace": true
174175
},
175176
"fracturedjsonvsc.MaxInlineLength": 160,
176177
}

doc/usage.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,8 @@ unary uses input and if more than one argument all as arguments ignoring the inp
382382
- For `capture` the `.string` value is a binary.
383383
- If pattern is a binary it will be matched literally and not as a regexp.
384384
- If pattern is a binary or flags include "b" each input byte will be read as separate code points
385-
- `scan_toend($v)`, `scan_toend($v; $flags)` works the same as `scan` but output binary are from start of match to
385+
- String function are not overloaded to support binary for now as some of them are bahaviours that might be confusing.
386+
- `explode` is overloaded to work with binary. Will explode into array of the unit of the binary.
386387
end of binary.
387388
instead of possibly multi-byte UTF-8 codepoints. This allows to match raw bytes. Ex: `match("\u00ff"; "b")`
388389
will match the byte `0xff` and not the UTF-8 encoded codepoint for 255, `match("[^\u00ff]"; "b")` will match

internal/difftest/difftest.go

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
// Package difftest implement test based on serialized string output
1+
// Package difftest implement test based on diffing serialized string output
22
//
3-
// User provides a function that gets a input string. It returns a output string
4-
// based on the input somehow and a output path to file with content to compare it
5-
// to or to write to if in write mode.
6-
// If there is a difference test will fail with a diff.
3+
// User provides a function that get a input path and input string and returns a
4+
// output path and output string. Content of output path and output string is compared
5+
// and if there is a difference the test fails with a diff.
76
//
87
// Test inputs are read from files matching Pattern from Path.
98
//
@@ -31,16 +30,6 @@ const green = "\x1b[32m"
3130
const red = "\x1b[31m"
3231
const reset = "\x1b[0m"
3332

34-
type Fn func(t *testing.T, path string, input string) (string, string, error)
35-
36-
type Options struct {
37-
Path string
38-
Pattern string
39-
ColorDiff bool
40-
WriteOutput bool
41-
Fn Fn
42-
}
43-
4433
func testDeepEqual(t tf, color bool, printfFn func(format string, args ...interface{}), expected string, actual string) {
4534
t.Helper()
4635

@@ -111,6 +100,16 @@ func Fatal(t tf, expected string, actual string) {
111100
testDeepEqual(t, false, t.Fatalf, expected, actual)
112101
}
113102

103+
type Fn func(t *testing.T, path string, input string) (string, string, error)
104+
105+
type Options struct {
106+
Path string
107+
Pattern string
108+
ColorDiff bool
109+
WriteOutput bool
110+
Fn Fn
111+
}
112+
114113
func TestWithOptions(t *testing.T, opts Options) {
115114
t.Helper()
116115

pkg/interp/binary.jq

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,94 @@ def tobytesrange: _tobits(8; true; 0);
55
def tobits($pad): _tobits(1; false; $pad);
66
def tobytes($pad): _tobits(8; false; $pad);
77
def tobitsrange($pad): _tobits(1; true; $pad);
8-
def tobytesrange($pad): _tobits(8; true; $pad);
8+
def tobytesrange($pad): _tobits(8; true; $pad);
9+
10+
# same as regexp.QuoteMeta
11+
def _re_quote_meta:
12+
gsub("(?<c>[\\.\\+\\*\\?\\(\\)\\|\\[\\]\\{\\}\\^\\$\\)])"; "\\\(.c)");
13+
14+
# TODO:
15+
# maybe implode, join. but what would it mean?
16+
# "abc" | tobits | explode | implode would not work
17+
18+
# helper for overloading regex/string functions to support binary
19+
def _binary_or_orig(bfn; fn):
20+
( _exttype as $exttype
21+
| if . == null or $exttype == "string" then fn
22+
elif $exttype == "binary" then bfn
23+
else
24+
( . as $s
25+
| try
26+
(tobytesrange | bfn)
27+
catch ($s | fn)
28+
)
29+
end
30+
);
31+
32+
def _orig_explode: explode;
33+
def explode: _binary_or_orig([.[range(.size)]]; _orig_explode);
34+
35+
def _orig_splits($val): splits($val);
36+
def _orig_splits($regex; $flags): splits($regex; $flags);
37+
def _splits_binary($regex; $flags):
38+
( . as $b
39+
# last null output is to do a last iteration that output from end of last match to end of binary
40+
| foreach (_match_binary($regex; $flags), null) as $m (
41+
{prev: null, curr: null};
42+
( .prev = .curr
43+
| .curr = $m
44+
);
45+
if .prev == null then $b[0:.curr.offset]
46+
elif .curr == null then $b[.prev.offset+.prev.length:]
47+
else $b[.prev.offset+.prev.length:.curr.offset+.curr.length]
48+
end
49+
)
50+
);
51+
def splits($val): _binary_or_orig(_splits_binary($val; "g"); _orig_splits($val));
52+
def splits($regex; $flags): _binary_or_orig(_splits_binary($regex; "g"+$flags); _orig_splits($regex; $flags));
53+
54+
def _orig_split($val): split($val);
55+
def _orig_split($regex; $flags): split($regex; $flags);
56+
# split/1 splits on string not regexp
57+
def split($val): [splits($val | _re_quote_meta)];
58+
def split($regex; $flags): [splits($regex; $flags)];
59+
60+
def _orig_test($val): test($val);
61+
def _orig_test($regex; $flags): test($regex; $flags);
62+
def _test_binary($regex; $flags):
63+
( isempty(_match_binary($regex; $flags))
64+
| not
65+
);
66+
def test($val): _binary_or_orig(_test_binary($val; ""); _orig_test($val));
67+
def test($regex; $flags): _binary_or_orig(_test_binary($regex; $flags); _orig_test($regex; $flags));
68+
69+
def _orig_match($val): match($val);
70+
def _orig_match($regex; $flags): match($regex; $flags);
71+
def match($val): _binary_or_orig(_match_binary($val); _orig_match($val));
72+
def match($regex; $flags): _binary_or_orig(_match_binary($regex; $flags); _orig_match($regex; $flags));
73+
74+
def _orig_capture($val): capture($val);
75+
def _orig_capture($regex; $flags): capture($regex; $flags);
76+
def _capture_binary($regex; $flags):
77+
( . as $b
78+
| _match_binary($regex; $flags)
79+
| .captures
80+
| map(
81+
( select(.name)
82+
| {key: .name, value: .string}
83+
)
84+
)
85+
| from_entries
86+
);
87+
def capture($val): _binary_or_orig(_capture_binary($val; ""); _orig_capture($val));
88+
def capture($regex; $flags): _binary_or_orig(_capture_binary($regex; $flags); _orig_capture($regex; $flags));
89+
90+
def _orig_scan($val): scan($val);
91+
def _orig_scan($regex; $flags): scan($regex; $flags);
92+
def _scan_binary($regex; $flags):
93+
( . as $b
94+
| _match_binary($regex; $flags)
95+
| $b[.offset:.offset+.length]
96+
);
97+
def scan($val): _binary_or_orig(_scan_binary($val; "g"); _orig_scan($val));
98+
def scan($regex; $flags): _binary_or_orig(_scan_binary($regex; "g"+$flags); _orig_scan($regex; $flags));

pkg/interp/interp.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ import (
4141
//go:embed ansi.jq
4242
//go:embed binary.jq
4343
//go:embed decode.jq
44-
//go:embed match.jq
4544
//go:embed funcs.jq
4645
//go:embed grep.jq
4746
//go:embed args.jq

pkg/interp/interp.jq

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ include "internal";
22
include "options";
33
include "binary";
44
include "decode";
5-
include "match";
65
include "funcs";
76
include "grep";
87
include "args";

pkg/interp/match.jq

Lines changed: 0 additions & 99 deletions
This file was deleted.

0 commit comments

Comments
 (0)