Skip to content

Commit affe439

Browse files
0AyanamiReiYour Name
authored andcommitted
[improvement](json load) Compatible with "$" and "$." that appear during json load (#56703)
For JSON-related parameters, both `$` and `$.` are supported for `json_root` in json load
1 parent 13d3988 commit affe439

File tree

4 files changed

+209
-3
lines changed

4 files changed

+209
-3
lines changed

be/src/vec/exec/format/json/new_json_reader.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -452,8 +452,13 @@ Status NewJsonReader::_parse_jsonpath_and_json_root() {
452452
if (!path.IsString()) {
453453
return Status::InvalidJsonPath("Invalid json path: {}", _jsonpaths);
454454
}
455+
std::string json_path = path.GetString();
456+
// $ -> $. in json_path
457+
if (UNLIKELY(json_path.size() == 1 && json_path[0] == '$')) {
458+
json_path.insert(1, ".");
459+
}
455460
std::vector<JsonPath> parsed_paths;
456-
JsonFunctions::parse_json_paths(path.GetString(), &parsed_paths);
461+
JsonFunctions::parse_json_paths(json_path, &parsed_paths);
457462
_parsed_jsonpaths.push_back(std::move(parsed_paths));
458463
}
459464

@@ -464,7 +469,12 @@ Status NewJsonReader::_parse_jsonpath_and_json_root() {
464469

465470
// parse jsonroot
466471
if (!_json_root.empty()) {
467-
JsonFunctions::parse_json_paths(_json_root, &_parsed_json_root);
472+
std::string json_root = _json_root;
473+
// $ -> $. in json_root
474+
if (json_root.size() == 1 && json_root[0] == '$') {
475+
json_root.insert(1, ".");
476+
}
477+
JsonFunctions::parse_json_paths(json_root, &_parsed_json_root);
468478
}
469479
return Status::OK();
470480
}
@@ -1410,7 +1420,7 @@ Status NewJsonReader::_simdjson_write_columns_by_jsonpath(
14101420
}
14111421
}
14121422
if (i < _parsed_jsonpaths.size() && JsonFunctions::is_root_path(_parsed_jsonpaths[i])) {
1413-
// Indicate that the jsonpath is "$.", read the full root json object, insert the original doc directly
1423+
// Indicate that the jsonpath is "$" or "$.", read the full root json object, insert the original doc directly
14141424
ColumnNullable* nullable_column = nullptr;
14151425
IColumn* target_column_ptr = nullptr;
14161426
if (slot_desc->is_nullable()) {
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[
2+
{"id": 1, "city": "Beijing", "code": 100},
3+
{"id": 2, "city": "Shanghai", "code": 200},
4+
{"id": 3, "city": "Guangzhou", "code": 300},
5+
{"id": 4, "city": "Shenzhen", "code": 400},
6+
{"id": 5, "city": "Hangzhou", "code": 500}
7+
]
8+
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !select_jsonroot_dollar_dot --
3+
1 Beijing 100
4+
2 Shanghai 200
5+
3 Guangzhou 300
6+
4 Shenzhen 400
7+
5 Hangzhou 500
8+
9+
-- !select_jsonroot_dollar --
10+
1 Beijing 100
11+
2 Shanghai 200
12+
3 Guangzhou 300
13+
4 Shenzhen 400
14+
5 Hangzhou 500
15+
16+
-- !select_jsonpath_dollar --
17+
1 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
18+
2 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
19+
3 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
20+
4 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
21+
5 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
22+
23+
-- !select_jsonpath_dollar_dot --
24+
1 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
25+
2 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
26+
3 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
27+
4 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
28+
5 [{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
29+
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
suite("test_json_root", "p0") {
19+
def testTable = "t"
20+
def dataFile = "simple_json.json"
21+
def s3BucketName = getS3BucketName()
22+
def s3Endpoint = getS3Endpoint()
23+
def s3Region = getS3Region()
24+
def ak = getS3AK()
25+
def sk = getS3SK()
26+
27+
sql "DROP TABLE IF EXISTS ${testTable}"
28+
29+
sql """
30+
CREATE TABLE IF NOT EXISTS ${testTable} (
31+
id INT DEFAULT '10',
32+
city VARCHAR(32) DEFAULT '',
33+
code BIGINT SUM DEFAULT '0')
34+
DISTRIBUTED BY RANDOM BUCKETS 10
35+
PROPERTIES("replication_num" = "1");
36+
"""
37+
38+
// case1: use "$." in json_root
39+
streamLoad {
40+
table testTable
41+
file dataFile
42+
time 10000
43+
set 'format', 'json'
44+
set 'strip_outer_array', 'true'
45+
set 'json_root', '$.'
46+
set 'columns', 'id,city,code'
47+
48+
check { result, exception, startTime, endTime ->
49+
if (exception != null) {
50+
throw exception
51+
}
52+
log.info("Stream load result: ${result}".toString())
53+
def json = parseJson(result)
54+
assertEquals("success", json.Status.toLowerCase())
55+
assertTrue(json.NumberLoadedRows > 0)
56+
}
57+
}
58+
59+
sql """ sync; """
60+
61+
qt_select_jsonroot_dollar_dot "SELECT * FROM ${testTable} ORDER BY id"
62+
63+
sql "TRUNCATE TABLE ${testTable}"
64+
65+
// case2: use "$" in json_root
66+
streamLoad {
67+
table testTable
68+
file dataFile
69+
time 10000
70+
set 'format', 'json'
71+
set 'strip_outer_array', 'true'
72+
set 'json_root', '$'
73+
set 'columns', 'id,city,code'
74+
75+
check { result, exception, startTime, endTime ->
76+
if (exception != null) {
77+
throw exception
78+
}
79+
log.info("Stream load result: ${result}".toString())
80+
def json = parseJson(result)
81+
assertEquals("success", json.Status.toLowerCase())
82+
assertTrue(json.NumberLoadedRows > 0)
83+
}
84+
}
85+
86+
sql """ sync; """
87+
88+
qt_select_jsonroot_dollar "SELECT * FROM ${testTable} ORDER BY id"
89+
90+
sql "DROP TABLE IF EXISTS ${testTable}"
91+
92+
testTable = "t_with_json"
93+
94+
sql """
95+
CREATE TABLE IF NOT EXISTS ${testTable}(
96+
c1 INT DEFAULT '10',
97+
c2 Json
98+
)
99+
DISTRIBUTED BY RANDOM BUCKETS 10
100+
PROPERTIES("replication_num" = "1");
101+
"""
102+
103+
// case3: use "$" in json_path
104+
105+
streamLoad {
106+
table testTable
107+
file dataFile
108+
time 10000
109+
set 'format', 'json'
110+
set 'strip_outer_array', 'true'
111+
set 'jsonpaths', '["$.id", "$"]'
112+
set 'columns', 'c1,c2'
113+
114+
check { result, exception, startTime, endTime ->
115+
if (exception != null) {
116+
throw exception
117+
}
118+
log.info("Stream load result: ${result}".toString())
119+
def json = parseJson(result)
120+
assertEquals("success", json.Status.toLowerCase())
121+
assertTrue(json.NumberLoadedRows > 0)
122+
}
123+
}
124+
125+
sql """ sync; """
126+
127+
qt_select_jsonpath_dollar "SELECT * FROM ${testTable} ORDER BY c1"
128+
129+
sql "TRUNCATE TABLE ${testTable}"
130+
131+
132+
// case4: use "$." in json_path
133+
134+
streamLoad {
135+
table testTable
136+
file dataFile
137+
time 10000
138+
set 'format', 'json'
139+
set 'strip_outer_array', 'true'
140+
set 'jsonpaths', '["$.id", "$."]'
141+
set 'columns', 'c1,c2'
142+
143+
check { result, exception, startTime, endTime ->
144+
if (exception != null) {
145+
throw exception
146+
}
147+
log.info("Stream load result: ${result}".toString())
148+
def json = parseJson(result)
149+
assertEquals("success", json.Status.toLowerCase())
150+
assertTrue(json.NumberLoadedRows > 0)
151+
}
152+
}
153+
154+
sql """ sync; """
155+
156+
qt_select_jsonpath_dollar_dot "SELECT * FROM ${testTable} ORDER BY c1"
157+
158+
sql "DROP TABLE IF EXISTS ${testTable}"
159+
}

0 commit comments

Comments
 (0)