diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 36c3a483d..9954afdb7 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -94,6 +94,7 @@ set(ICEBERG_SOURCES util/timepoint.cc util/truncate_util.cc util/type_util.cc + util/url_encoder.cc util/uuid.cc) set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS) diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 3929e1803..cc0991da6 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -116,6 +116,7 @@ iceberg_sources = files( 'util/timepoint.cc', 'util/truncate_util.cc', 'util/type_util.cc', + 'util/url_encoder.cc', 'util/uuid.cc', ) diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 30a473fd2..71bf90182 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -108,6 +108,7 @@ add_iceberg_test(util_test location_util_test.cc string_util_test.cc truncate_util_test.cc + url_encoder_test.cc uuid_test.cc visit_type_test.cc) diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 378182819..50422ccc9 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -88,6 +88,7 @@ iceberg_tests = { 'location_util_test.cc', 'string_util_test.cc', 'truncate_util_test.cc', + 'url_encoder_test.cc', 'uuid_test.cc', 'visit_type_test.cc', ), diff --git a/src/iceberg/test/url_encoder_test.cc b/src/iceberg/test/url_encoder_test.cc new file mode 100644 index 000000000..52b4e2826 --- /dev/null +++ b/src/iceberg/test/url_encoder_test.cc @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/url_encoder.h" + +#include + +#include "iceberg/test/matchers.h" + +namespace iceberg { + +TEST(UrlEncoderTest, Encode) { + // RFC 3986 unreserved characters should not be encoded + EXPECT_THAT(UrlEncoder::Encode("abc123XYZ"), ::testing::Eq("abc123XYZ")); + EXPECT_THAT(UrlEncoder::Encode("test-file_name.txt~backup"), + ::testing::Eq("test-file_name.txt~backup")); + + // Spaces and special characters should be encoded + EXPECT_THAT(UrlEncoder::Encode("hello world"), ::testing::Eq("hello%20world")); + EXPECT_THAT(UrlEncoder::Encode("test@example.com"), + ::testing::Eq("test%40example.com")); + EXPECT_THAT(UrlEncoder::Encode("path/to/file"), ::testing::Eq("path%2fto%2ffile")); + EXPECT_THAT(UrlEncoder::Encode("key=value&foo=bar"), + ::testing::Eq("key%3dvalue%26foo%3dbar")); + EXPECT_THAT(UrlEncoder::Encode("100%"), ::testing::Eq("100%25")); + EXPECT_THAT(UrlEncoder::Encode("hello\x1fworld"), ::testing::Eq("hello%1fworld")); + EXPECT_THAT(UrlEncoder::Encode(""), ::testing::Eq("")); +} + +TEST(UrlEncoderTest, Decode) { + // Decode percent-encoded strings + EXPECT_THAT(UrlEncoder::Decode("hello%20world"), ::testing::Eq("hello world")); + EXPECT_THAT(UrlEncoder::Decode("test%40example.com"), + ::testing::Eq("test@example.com")); + EXPECT_THAT(UrlEncoder::Decode("path%2fto%2Ffile"), ::testing::Eq("path/to/file")); + EXPECT_THAT(UrlEncoder::Decode("key%3dvalue%26foo%3Dbar"), + ::testing::Eq("key=value&foo=bar")); + EXPECT_THAT(UrlEncoder::Decode("100%25"), ::testing::Eq("100%")); + + // ASCII Unit Separator (0x1F) + EXPECT_THAT(UrlEncoder::Decode("hello%1Fworld"), ::testing::Eq("hello\x1Fworld")); + + // Unreserved characters remain unchanged + EXPECT_THAT(UrlEncoder::Decode("test-file_name.txt~backup"), + ::testing::Eq("test-file_name.txt~backup")); + EXPECT_THAT(UrlEncoder::Decode(""), ::testing::Eq("")); +} + +TEST(UrlEncoderTest, EncodeDecodeRoundTrip) { + std::vector test_cases = {"hello world", + "test@example.com", + "path/to/file", + "key=value&foo=bar", + "100%", + "hello\x1Fworld", + "special!@#$%^&*()chars", + "mixed-123_test.file~ok", + ""}; + + for (const auto& test : test_cases) { + std::string encoded = UrlEncoder::Encode(test); + std::string decoded = UrlEncoder::Decode(encoded); + EXPECT_EQ(decoded, test) << "Round-trip failed for: " << test; + } +} + +} // namespace iceberg diff --git a/src/iceberg/util/meson.build b/src/iceberg/util/meson.build index 880f63401..b3866b705 100644 --- a/src/iceberg/util/meson.build +++ b/src/iceberg/util/meson.build @@ -38,6 +38,7 @@ install_headers( 'timepoint.h', 'truncate_util.h', 'type_util.h', + 'url_encoder.h', 'uuid.h', 'visitor_generate.h', 'visit_type.h', diff --git a/src/iceberg/util/url_encoder.cc b/src/iceberg/util/url_encoder.cc new file mode 100644 index 000000000..32c5f4ee8 --- /dev/null +++ b/src/iceberg/util/url_encoder.cc @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/url_encoder.h" + +#include +#include + +namespace iceberg { + +std::string UrlEncoder::Encode(std::string_view str_to_encode) { + std::stringstream escaped; + escaped.fill('0'); + escaped << std::hex; + + for (unsigned char c : str_to_encode) { + // reserve letters, numbers and -._~ + if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') { + escaped << c; + } else { + escaped << '%' << std::setw(2) << static_cast(c) << std::setfill('0'); + } + } + return escaped.str(); +} + +std::string UrlEncoder::Decode(std::string_view str_to_decode) { + std::string result; + result.reserve(str_to_decode.size()); + + for (size_t i = 0; i < str_to_decode.size(); ++i) { + char c = str_to_decode[i]; + if (c == '%' && i + 2 < str_to_decode.size()) { + std::string hex(str_to_decode.substr(i + 1, 2)); + try { + char decoded = static_cast(std::stoi(hex, nullptr, 16)); + result += decoded; + i += 2; + } catch (...) { + result += c; + } + } else if (c == '+') { + // In application/x-www-form-urlencoded, '+' represents a whitespace. + result += ' '; + } else { + result += c; + } + } + + return result; +} + +} // namespace iceberg diff --git a/src/iceberg/util/url_encoder.h b/src/iceberg/util/url_encoder.h new file mode 100644 index 000000000..50a14c504 --- /dev/null +++ b/src/iceberg/util/url_encoder.h @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" + +/// \file iceberg/util/url_encoder.h +/// \brief URL encoding and decoding. + +namespace iceberg { + +/// \brief Utilities for encoding and decoding URLs. +class ICEBERG_EXPORT UrlEncoder { + public: + /// \brief URL-encode a string. + /// + /// \details This is a simple implementation of url-encode + /// - Unreserved characters: [A-Z], [a-z], [0-9], "-", "_", ".", "~" + /// - Space is encoded as "%20" (unlike Java's URLEncoder which uses "+"). + /// - All other characters are percent-encoded (%XX). + /// \param str_to_encode The string to encode. + /// \return The URL-encoded string. + static std::string Encode(std::string_view str_to_encode); + + /// \brief URL-decode a string. + /// + /// \details Decodes percent-encoded characters (e.g., "%20" -> space). + /// \param str_to_decode The encoded string to decode. + /// \return The decoded string. + static std::string Decode(std::string_view str_to_decode); +}; + +} // namespace iceberg