Skip to content

Commit 5bb3759

Browse files
committed
feat: a simple impl of url encoder
1 parent a89924d commit 5bb3759

File tree

8 files changed

+211
-0
lines changed

8 files changed

+211
-0
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ set(ICEBERG_SOURCES
9494
util/timepoint.cc
9595
util/truncate_util.cc
9696
util/type_util.cc
97+
util/url_encoder.cc
9798
util/uuid.cc)
9899

99100
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ iceberg_sources = files(
116116
'util/timepoint.cc',
117117
'util/truncate_util.cc',
118118
'util/type_util.cc',
119+
'util/url_encoder.cc',
119120
'util/uuid.cc',
120121
)
121122

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ add_iceberg_test(util_test
108108
location_util_test.cc
109109
string_util_test.cc
110110
truncate_util_test.cc
111+
url_encoder_test.cc
111112
uuid_test.cc
112113
visit_type_test.cc)
113114

src/iceberg/test/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ iceberg_tests = {
8888
'location_util_test.cc',
8989
'string_util_test.cc',
9090
'truncate_util_test.cc',
91+
'url_encoder_test.cc',
9192
'uuid_test.cc',
9293
'visit_type_test.cc',
9394
),
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/util/url_encoder.h"
21+
22+
#include <gtest/gtest.h>
23+
24+
#include "iceberg/test/matchers.h"
25+
26+
namespace iceberg {
27+
28+
TEST(UrlEncoderTest, Encode) {
29+
// RFC 3986 unreserved characters should not be encoded
30+
EXPECT_THAT(UrlEncoder::Encode("abc123XYZ"), ::testing::Eq("abc123XYZ"));
31+
EXPECT_THAT(UrlEncoder::Encode("test-file_name.txt~backup"),
32+
::testing::Eq("test-file_name.txt~backup"));
33+
34+
// Spaces and special characters should be encoded
35+
EXPECT_THAT(UrlEncoder::Encode("hello world"), ::testing::Eq("hello%20world"));
36+
EXPECT_THAT(UrlEncoder::Encode("[email protected]"),
37+
::testing::Eq("test%40example.com"));
38+
EXPECT_THAT(UrlEncoder::Encode("path/to/file"), ::testing::Eq("path%2fto%2ffile"));
39+
EXPECT_THAT(UrlEncoder::Encode("key=value&foo=bar"),
40+
::testing::Eq("key%3dvalue%26foo%3dbar"));
41+
EXPECT_THAT(UrlEncoder::Encode("100%"), ::testing::Eq("100%25"));
42+
EXPECT_THAT(UrlEncoder::Encode("hello\x1fworld"), ::testing::Eq("hello%1fworld"));
43+
EXPECT_THAT(UrlEncoder::Encode(""), ::testing::Eq(""));
44+
}
45+
46+
TEST(UrlEncoderTest, Decode) {
47+
// Decode percent-encoded strings
48+
EXPECT_THAT(UrlEncoder::Decode("hello%20world"), ::testing::Eq("hello world"));
49+
EXPECT_THAT(UrlEncoder::Decode("test%40example.com"),
50+
::testing::Eq("[email protected]"));
51+
EXPECT_THAT(UrlEncoder::Decode("path%2fto%2Ffile"), ::testing::Eq("path/to/file"));
52+
EXPECT_THAT(UrlEncoder::Decode("key%3dvalue%26foo%3Dbar"),
53+
::testing::Eq("key=value&foo=bar"));
54+
EXPECT_THAT(UrlEncoder::Decode("100%25"), ::testing::Eq("100%"));
55+
56+
// ASCII Unit Separator (0x1F)
57+
EXPECT_THAT(UrlEncoder::Decode("hello%1Fworld"), ::testing::Eq("hello\x1Fworld"));
58+
59+
// Unreserved characters remain unchanged
60+
EXPECT_THAT(UrlEncoder::Decode("test-file_name.txt~backup"),
61+
::testing::Eq("test-file_name.txt~backup"));
62+
EXPECT_THAT(UrlEncoder::Decode(""), ::testing::Eq(""));
63+
}
64+
65+
TEST(UrlEncoderTest, EncodeDecodeRoundTrip) {
66+
std::vector<std::string> test_cases = {"hello world",
67+
68+
"path/to/file",
69+
"key=value&foo=bar",
70+
"100%",
71+
"hello\x1Fworld",
72+
"special!@#$%^&*()chars",
73+
"mixed-123_test.file~ok",
74+
""};
75+
76+
for (const auto& test : test_cases) {
77+
std::string encoded = UrlEncoder::Encode(test);
78+
std::string decoded = UrlEncoder::Decode(encoded);
79+
EXPECT_EQ(decoded, test) << "Round-trip failed for: " << test;
80+
}
81+
}
82+
83+
} // namespace iceberg

src/iceberg/util/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ install_headers(
3838
'timepoint.h',
3939
'truncate_util.h',
4040
'type_util.h',
41+
'url_encoder.h',
4142
'uuid.h',
4243
'visitor_generate.h',
4344
'visit_type.h',

src/iceberg/util/url_encoder.cc

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/util/url_encoder.h"
21+
22+
#include <iomanip>
23+
#include <sstream>
24+
25+
namespace iceberg {
26+
27+
std::string UrlEncoder::Encode(std::string_view str_to_encode) {
28+
std::stringstream escaped;
29+
escaped.fill('0');
30+
escaped << std::hex;
31+
32+
for (unsigned char c : str_to_encode) {
33+
// reserve letters, numbers and -._~
34+
if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
35+
escaped << c;
36+
} else {
37+
escaped << '%' << std::setw(2) << static_cast<int>(c) << std::setfill('0');
38+
}
39+
}
40+
return escaped.str();
41+
}
42+
43+
std::string UrlEncoder::Decode(std::string_view str_to_decode) {
44+
std::string result;
45+
result.reserve(str_to_decode.size());
46+
47+
for (size_t i = 0; i < str_to_decode.size(); ++i) {
48+
char c = str_to_decode[i];
49+
if (c == '%' && i + 2 < str_to_decode.size()) {
50+
std::string hex(str_to_decode.substr(i + 1, 2));
51+
try {
52+
char decoded = static_cast<char>(std::stoi(hex, nullptr, 16));
53+
result += decoded;
54+
i += 2;
55+
} catch (...) {
56+
result += c;
57+
}
58+
} else if (c == '+') {
59+
// In application/x-www-form-urlencoded, '+' represents a whitespace.
60+
result += ' ';
61+
} else {
62+
result += c;
63+
}
64+
}
65+
66+
return result;
67+
}
68+
69+
} // namespace iceberg

src/iceberg/util/url_encoder.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include <string>
23+
#include <string_view>
24+
25+
#include "iceberg/iceberg_export.h"
26+
#include "iceberg/result.h"
27+
28+
/// \file iceberg/util/url_encoder.h
29+
/// \brief URL encoding and decoding.
30+
31+
namespace iceberg {
32+
33+
/// \brief Utilities for encoding and decoding URLs.
34+
class ICEBERG_EXPORT UrlEncoder {
35+
public:
36+
/// \brief URL-encode a string.
37+
///
38+
/// \details This is a simple implementation of url-encode
39+
/// - Unreserved characters: [A-Z], [a-z], [0-9], "-", "_", ".", "~"
40+
/// - Space is encoded as "%20" (unlike Java's URLEncoder which uses "+").
41+
/// - All other characters are percent-encoded (%XX).
42+
/// \param str_to_encode The string to encode.
43+
/// \return The URL-encoded string.
44+
static std::string Encode(std::string_view str_to_encode);
45+
46+
/// \brief URL-decode a string.
47+
///
48+
/// \details Decodes percent-encoded characters (e.g., "%20" -> space).
49+
/// \param str_to_decode The encoded string to decode.
50+
/// \return The decoded string.
51+
static std::string Decode(std::string_view str_to_decode);
52+
};
53+
54+
} // namespace iceberg

0 commit comments

Comments
 (0)