Skip to content

feat: support the to_binary with format #18525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 54 additions & 2 deletions src/query/functions/src/scalars/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,12 @@ use databend_common_expression::FunctionRegistry;
use databend_common_expression::Value;

pub fn register(registry: &mut FunctionRegistry) {
registry.register_aliases("to_hex", &["hex"]);
registry.register_aliases("from_hex", &["unhex"]);
registry.register_aliases("to_hex", &["hex", "hex_encode"]);
registry.register_aliases("from_hex", &["unhex", "hex_decode_binary"]);
registry.register_aliases("try_from_hex", &["try_hex_decode_binary"]);
registry.register_aliases("to_base64", &["base64_encode"]);
registry.register_aliases("from_base64", &["base64_decode_binary"]);
registry.register_aliases("try_from_base64", &["try_base64_decode_binary"]);

registry.register_passthrough_nullable_1_arg::<BinaryType, NumberType<u64>, _, _>(
"length",
Expand Down Expand Up @@ -137,6 +141,32 @@ pub fn register(registry: &mut FunctionRegistry) {
},
);

registry.register_passthrough_nullable_2_arg::<StringType, StringType, BinaryType, _, _>(
"to_binary",
|_, _, _| FunctionDomain::Full,
|val, format, ctx| {
let Some(format) = format.as_scalar() else {
ctx.set_error(
0,
"`format` parameter must be a scalar constant, not a column or expression",
);
return Value::Scalar(Vec::new());
};
match format.to_ascii_lowercase().as_str() {
"hex" => eval_unhex(val, ctx),
"base64" => eval_from_base64(val, ctx),
"utf-8" => match val {
Value::Scalar(val) => Value::Scalar(val.as_bytes().to_vec()),
Value::Column(col) => Value::Column(col.into()),
},
_ => {
ctx.set_error(0, "The format option only supports hex, base64, and utf-8");
Value::Scalar(Vec::new())
}
}
},
);

registry.register_combine_nullable_1_arg::<StringType, BinaryType, _, _>(
"try_to_binary",
|_, _| FunctionDomain::Full,
Expand All @@ -149,6 +179,28 @@ pub fn register(registry: &mut FunctionRegistry) {
},
);

registry.register_combine_nullable_2_arg::<StringType, StringType, BinaryType, _, _>(
"try_to_binary",
|_, _, _| FunctionDomain::Full,
|val, format, ctx| {
let Some(format) = format.as_scalar() else {
return Value::Scalar(None);
};
match format.to_ascii_lowercase().as_str() {
"hex" => error_to_null(eval_unhex)(val, ctx),
"base64" => error_to_null(eval_from_base64)(val, ctx),
"utf-8" => match val {
Value::Scalar(val) => Value::Scalar(Some(val.as_bytes().to_vec())),
Value::Column(col) => {
let validity = Bitmap::new_constant(true, col.len());
Value::Column(NullableColumn::new_unchecked(col.into(), validity))
}
},
_ => Value::Scalar(None),
}
},
);

registry.register_passthrough_nullable_1_arg::<BinaryType, StringType, _, _>(
"to_hex",
|_, _| FunctionDomain::Full,
Expand Down
10 changes: 10 additions & 0 deletions src/query/functions/tests/it/scalars/testdata/function_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ array_get -> get
array_length -> length
array_size -> length
array_slice -> slice
base64_decode_binary -> from_base64
base64_encode -> to_base64
between_dows -> between_days
between_doys -> between_days
between_epochs -> between_seconds
Expand All @@ -28,6 +30,8 @@ diff_doys -> diff_days
diff_epochs -> diff_seconds
diff_isodows -> diff_days
hex -> to_hex
hex_decode_binary -> from_hex
hex_encode -> to_hex
intdiv -> div
ipv4_num_to_string -> inet_ntoa
ipv4_string_to_num -> inet_aton
Expand Down Expand Up @@ -95,6 +99,8 @@ to_start_of_iso_week -> to_monday
to_text -> to_string
to_varchar -> to_string
trunc -> truncate
try_base64_decode_binary -> try_from_base64
try_hex_decode_binary -> try_from_hex
try_ipv4_num_to_string -> try_inet_ntoa
try_ipv4_string_to_num -> try_inet_aton
try_json_object -> try_object_construct
Expand Down Expand Up @@ -3944,6 +3950,8 @@ Functions overloads:
7 to_binary(Geography NULL) :: Binary NULL
8 to_binary(String) :: Binary
9 to_binary(String NULL) :: Binary NULL
10 to_binary(String, String) :: Binary
11 to_binary(String NULL, String NULL) :: Binary NULL
0 to_bitmap(String) :: Bitmap
1 to_bitmap(String NULL) :: Bitmap NULL
2 to_bitmap(UInt64) :: Bitmap
Expand Down Expand Up @@ -4616,6 +4624,8 @@ Functions overloads:
7 try_to_binary(Geography NULL) :: Binary NULL
8 try_to_binary(String) :: Binary NULL
9 try_to_binary(String NULL) :: Binary NULL
10 try_to_binary(String, String) :: Binary NULL
11 try_to_binary(String NULL, String NULL) :: Binary NULL
0 try_to_boolean(Variant) :: Boolean NULL
1 try_to_boolean(Variant NULL) :: Boolean NULL
2 try_to_boolean(String) :: Boolean NULL
Expand Down
34 changes: 34 additions & 0 deletions src/query/sql/src/planner/semantic/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3644,6 +3644,10 @@ impl<'a> TypeChecker<'a> {
Ascii::new("stream_has_data"),
Ascii::new("getvariable"),
Ascii::new("equal_null"),
Ascii::new("hex_decode_string"),
Ascii::new("base64_decode_string"),
Ascii::new("try_hex_decode_string"),
Ascii::new("try_base64_decode_string"),
];
FUNCTIONS
}
Expand Down Expand Up @@ -4302,6 +4306,36 @@ impl<'a> TypeChecker<'a> {
Some(self.resolve_map_access(span, expr, paths))
}
}
(func_name, &[expr])
if matches!(
func_name,
"hex_decode_string"
| "try_hex_decode_string"
| "base64_decode_string"
| "try_base64_decode_string"
) =>
{
Some(self.resolve(&Expr::Cast {
span,
expr: Box::new(Expr::FunctionCall {
span,
func: ASTFunctionCall {
distinct: false,
name: Identifier::from_name(
span,
func_name.replace("_string", "_binary"),
),
args: vec![expr.clone()],
params: vec![],
order_by: vec![],
window: None,
lambda: None,
},
}),
target_type: TypeName::String,
pg_style: false,
}))
}
_ => None,
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ statement ok
CREATE TABLE IF NOT EXISTS t1(id Int, v binary) Engine = Fuse

statement ok
INSERT INTO t1 (id, v) VALUES(1, to_binary('aaa')),(2, from_hex('616161')),(3, from_base64('YWFh'))
INSERT INTO t1 (id, v) VALUES(1, to_binary('aaa')),(2, from_hex('616161')),(3, from_base64('YWFh')),(4, to_binary('aaa', 'utf-8')),(5, to_binary('616161', 'hex')),(6, to_binary('YWFh', 'base64'))

statement ok
INSERT INTO t1 (id, v) VALUES(4, 'aaa')
INSERT INTO t1 (id, v) VALUES(7, 'aaa')

query IT
SELECT id, v FROM t1 order by id
Expand All @@ -23,6 +23,9 @@ SELECT id, v FROM t1 order by id
2 616161
3 616161
4 616161
5 616161
6 616161
7 616161

statement ok
ALTER TABLE t1 MODIFY COLUMN v string
Expand All @@ -34,6 +37,9 @@ SELECT id, v FROM t1 order by id
2 aaa
3 aaa
4 aaa
5 aaa
6 aaa
7 aaa

statement ok
ALTER TABLE t1 MODIFY COLUMN v binary
Expand All @@ -45,6 +51,9 @@ SELECT id, v FROM t1 order by id
2 616161
3 616161
4 616161
5 616161
6 616161
7 616161

statement ok
create table t2(a int, b binary NOT NULL DEFAULT 'abc', c double default 'inf', e float default 'nan' );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,12 @@ select hex(null)
----
NULL

query T
SELECT FROM_HEX(TO_HEX('abc'))::STRING
----
abc

query T
SELECT HEX_DECODE_STRING(TO_HEX('abc'))
----
abc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ SELECT FROM_BASE64(TO_BASE64('abc'))::STRING
----
abc

query T
SELECT BASE64_DECODE_STRING(TO_BASE64('abc'))
----
abc

query T
SELECT TO_BASE64(NULL)
----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Error: APIError: QueryFailed: [1008]error:
--> SQL:1:8
|
1 | select base64(1)
| ^^^^^^^^^ no function matches the given name: 'base64', do you mean 'to_base64'?
| ^^^^^^^^^ no function matches the given name: 'base64', do you mean 'base64_encode', 'base64_decode_binary', 'base64_decode_string', 'to_base64'?


Error: APIError: QueryFailed: [1065]error:
Expand Down