Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ set(ICEBERG_SOURCES
util/timepoint.cc
util/truncate_util.cc
util/type_util.cc
util/url_encoder.cc
util/uuid.cc)

set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ iceberg_sources = files(
'util/timepoint.cc',
'util/truncate_util.cc',
'util/type_util.cc',
'util/url_encoder.cc',
'util/uuid.cc',
)

Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ add_iceberg_test(util_test
location_util_test.cc
string_util_test.cc
truncate_util_test.cc
url_encoder_test.cc
uuid_test.cc
visit_type_test.cc)

Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ iceberg_tests = {
'location_util_test.cc',
'string_util_test.cc',
'truncate_util_test.cc',
'url_encoder_test.cc',
'uuid_test.cc',
'visit_type_test.cc',
),
Expand Down
83 changes: 83 additions & 0 deletions src/iceberg/test/url_encoder_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/util/url_encoder.h"

#include <gtest/gtest.h>

#include "iceberg/test/matchers.h"

namespace iceberg {

TEST(UrlEncoderTest, Encode) {
// RFC 3986 unreserved characters should not be encoded
EXPECT_THAT(UrlEncoder::Encode("abc123XYZ"), ::testing::Eq("abc123XYZ"));
EXPECT_THAT(UrlEncoder::Encode("test-file_name.txt~backup"),
::testing::Eq("test-file_name.txt~backup"));

// Spaces and special characters should be encoded
EXPECT_THAT(UrlEncoder::Encode("hello world"), ::testing::Eq("hello%20world"));
EXPECT_THAT(UrlEncoder::Encode("[email protected]"),
::testing::Eq("test%40example.com"));
EXPECT_THAT(UrlEncoder::Encode("path/to/file"), ::testing::Eq("path%2fto%2ffile"));
EXPECT_THAT(UrlEncoder::Encode("key=value&foo=bar"),
::testing::Eq("key%3dvalue%26foo%3dbar"));
EXPECT_THAT(UrlEncoder::Encode("100%"), ::testing::Eq("100%25"));
EXPECT_THAT(UrlEncoder::Encode("hello\x1fworld"), ::testing::Eq("hello%1fworld"));
EXPECT_THAT(UrlEncoder::Encode(""), ::testing::Eq(""));
}

TEST(UrlEncoderTest, Decode) {
// Decode percent-encoded strings
EXPECT_THAT(UrlEncoder::Decode("hello%20world"), ::testing::Eq("hello world"));
EXPECT_THAT(UrlEncoder::Decode("test%40example.com"),
::testing::Eq("[email protected]"));
EXPECT_THAT(UrlEncoder::Decode("path%2fto%2Ffile"), ::testing::Eq("path/to/file"));
EXPECT_THAT(UrlEncoder::Decode("key%3dvalue%26foo%3Dbar"),
::testing::Eq("key=value&foo=bar"));
EXPECT_THAT(UrlEncoder::Decode("100%25"), ::testing::Eq("100%"));

// ASCII Unit Separator (0x1F)
EXPECT_THAT(UrlEncoder::Decode("hello%1Fworld"), ::testing::Eq("hello\x1Fworld"));

// Unreserved characters remain unchanged
EXPECT_THAT(UrlEncoder::Decode("test-file_name.txt~backup"),
::testing::Eq("test-file_name.txt~backup"));
EXPECT_THAT(UrlEncoder::Decode(""), ::testing::Eq(""));
}

TEST(UrlEncoderTest, EncodeDecodeRoundTrip) {
std::vector<std::string> test_cases = {"hello world",
"[email protected]",
"path/to/file",
"key=value&foo=bar",
"100%",
"hello\x1Fworld",
"special!@#$%^&*()chars",
"mixed-123_test.file~ok",
""};

for (const auto& test : test_cases) {
std::string encoded = UrlEncoder::Encode(test);
std::string decoded = UrlEncoder::Decode(encoded);
EXPECT_EQ(decoded, test) << "Round-trip failed for: " << test;
}
}

} // namespace iceberg
1 change: 1 addition & 0 deletions src/iceberg/util/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ install_headers(
'timepoint.h',
'truncate_util.h',
'type_util.h',
'url_encoder.h',
'uuid.h',
'visitor_generate.h',
'visit_type.h',
Expand Down
69 changes: 69 additions & 0 deletions src/iceberg/util/url_encoder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/util/url_encoder.h"

#include <iomanip>
#include <sstream>

namespace iceberg {

std::string UrlEncoder::Encode(std::string_view str_to_encode) {
std::stringstream escaped;
escaped.fill('0');
escaped << std::hex;

for (unsigned char c : str_to_encode) {
// reserve letters, numbers and -._~
if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
escaped << c;
} else {
escaped << '%' << std::setw(2) << static_cast<int>(c) << std::setfill('0');
}
}
return escaped.str();
}

std::string UrlEncoder::Decode(std::string_view str_to_decode) {
std::string result;
result.reserve(str_to_decode.size());

for (size_t i = 0; i < str_to_decode.size(); ++i) {
char c = str_to_decode[i];
if (c == '%' && i + 2 < str_to_decode.size()) {
std::string hex(str_to_decode.substr(i + 1, 2));
try {
char decoded = static_cast<char>(std::stoi(hex, nullptr, 16));
result += decoded;
i += 2;
} catch (...) {
result += c;
}
} else if (c == '+') {
// In application/x-www-form-urlencoded, '+' represents a whitespace.
result += ' ';
} else {
result += c;
}
}

return result;
}

} // namespace iceberg
54 changes: 54 additions & 0 deletions src/iceberg/util/url_encoder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include <string>
#include <string_view>

#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"

/// \file iceberg/util/url_encoder.h
/// \brief URL encoding and decoding.

namespace iceberg {

/// \brief Utilities for encoding and decoding URLs.
class ICEBERG_EXPORT UrlEncoder {
public:
/// \brief URL-encode a string.
///
/// \details This is a simple implementation of url-encode
/// - Unreserved characters: [A-Z], [a-z], [0-9], "-", "_", ".", "~"
/// - Space is encoded as "%20" (unlike Java's URLEncoder which uses "+").
/// - All other characters are percent-encoded (%XX).
/// \param str_to_encode The string to encode.
/// \return The URL-encoded string.
static std::string Encode(std::string_view str_to_encode);

/// \brief URL-decode a string.
///
/// \details Decodes percent-encoded characters (e.g., "%20" -> space).
/// \param str_to_decode The encoded string to decode.
/// \return The decoded string.
static std::string Decode(std::string_view str_to_decode);
};

} // namespace iceberg
Loading