diff --git a/BUILDING.txt b/BUILDING.txt
index f87cc8d11ead2..6eb32e50b8c64 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -388,6 +388,51 @@ Create a local staging version of the website (in /tmp/hadoop-site)
Note that the site needs to be built in a second pass after other artifacts.
+----------------------------------------------------------------------------------
+Including Cloud Connector Dependencies in Distributions:
+
+Hadoop distributions include the hadoop modules to work with data and services
+on cloud infrastructure
+
+However, dependencies are omitted for all cloud connectors except hadoop-azure
+(abfs:// and wasb://) and possibly hadoop-gcp (gs://) and hadoop-tos (tos://).
+For the latter two modules, it depends on shading options.
+
+For hadoop-aws the AWS SDK bundle.jar omitted, but everything else is included.
+
+* This keeps binary release size below the limit of apache distributions
+* Reduces download and size overhead in docker usage.
+* Reduces the CVE attack surface
+* Reduces the risk of classpath conflict.
+
+To produce a build with the specific desired dependencies, the build must be executed
+with the relevant profile of ${module}-package.
+
+For example, a build with the hadoop-aws and hadoop-azure-datalake dependencies,
+run with
+
+ mvn package -Pdist -DskipTests -Dhadoop-aws-package -Dhadoop-azure-datalake-package
+
+Available package profiles:
+ hadoop-aliyun-package
+ hadoop-aws-package
+ hadoop-azure-datalake-package
+ hadoop-cos-package
+ hadoop-gcp-package
+ hadoop-huaweicloud-package
+ hadoop-tos-package
+
+To build a complete distribution then with all cloud dependencies included:
+
+mvn package -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true \
+ -Phadoop-aliyun-package,hadoop-aws-package,hadoop-azure-datalake-package\
+ -Phadoop-cos-package,hadoop-gcp-package,hadoop-tos-package
+
+The resulting tar file will be too large to be distributable through ASF infrastructure.
+
+The hadoop-gcp and hadoop-tos artifacts include their dependencies unless the distribution
+is built with -DskipShade.
+
----------------------------------------------------------------------------------
Installing Hadoop
diff --git a/LICENSE-binary b/LICENSE-binary
index c9d53de77981a..6227d22424c80 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -203,7 +203,10 @@
--------------------------------------------------------------------------------
This project bundles some components that are also licensed under the Apache
-License Version 2.0:
+License Version 2.0.
+Note: some of the listed artifacts may not be included in a given build of the binary
+distribution; it depends on the build options. This list intends
+to be inclusive of all which may be included:
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/nvd3-1.8.5.* (css and js files)
@@ -211,10 +214,12 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/data
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java
ch.qos.reload4j:reload4j:1.2.22
+com.aliyun:aliyun-java-core:0.2.11-beta
com.aliyun:aliyun-java-sdk-core:4.5.10
com.aliyun:aliyun-java-sdk-kms:2.11.0
com.aliyun:aliyun-java-sdk-ram:3.1.0
com.aliyun:aliyun-java-sdk-sts:3.0.0
+com.aliyun:java-trace-api:0.2.11-beta
com.aliyun.oss:aliyun-sdk-oss:3.13.2
com.cedarsoftware:java-util:1.9.0
com.cedarsoftware:json-io:2.5.1
@@ -268,8 +273,13 @@ com.google.http-client:google-http-client-jackson2:1.46.3
com.google.http-client:google-http-client:1.46.3
com.google.j2objc:j2objc-annotations:3.0.0
com.google.oauth-client:google-oauth-client:1.37.0
-com.microsoft.azure:azure-storage:7.0.0
+com.huaweicloud:esdk-obs-java:3.20.4.2
+com.jamesmurty.utils:java-xmlbuilder-1.2.jar
+com.microsoft.azure:azure-storage:7.0.1
com.nimbusds:nimbus-jose-jwt:10.4
+com.squareup.okhttp3:okhttp:jar:3.14.2
+com.squareup.okio:okio:jar:1.17.2
+com.volcengine:ve-tos-java-sdk-hadoop:2.8.9.jar
com.zaxxer:HikariCP:4.0.3
commons-beanutils:commons-beanutils:1.9.4
commons-cli:commons-cli:1.9.0
@@ -346,6 +356,9 @@ io.opentelemetry:opentelemetry-sdk-logs:1.47.0
io.opentelemetry:opentelemetry-sdk-metrics:1.47.0
io.opentelemetry:opentelemetry-sdk-trace:1.47.0
io.opentelemetry.semconv:opentelemetry-semconv:1.29.0-alpha
+io.opentracing:opentracing-api:0.33.0.jar
+io.opentracing:opentracing-noop:0.33.0.jar
+io.opentracing:opentracing-util:0.33.0.jar
io.reactivex:rxjava:1.3.8
io.reactivex:rxjava-string:1.1.1
io.reactivex:rxnetty:0.4.20
@@ -373,6 +386,8 @@ org.apache.htrace:htrace-core:3.1.0-incubating
org.apache.htrace:htrace-core4:4.1.0-incubating
org.apache.httpcomponents:httpclient:4.5.13
org.apache.httpcomponents:httpcore:4.4.13
+org.apache.httpcomponents.client5:httpclient5:5.5
+org.apache.httpcomponents.core5:httpcore5:5.5
org.apache.kafka:kafka-clients:3.9.0
org.apache.kerby:kerb-admin:2.0.3
org.apache.kerby:kerb-client:2.0.3
@@ -496,6 +511,7 @@ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanage
bootstrap v3.3.6
broccoli-asset-rev v2.4.2
broccoli-funnel v1.0.1
+cos_api-bundle-5.6.19.jar
datatables v1.11.5
em-helpers v0.5.13
em-table v0.1.6
@@ -541,7 +557,7 @@ com.microsoft.azure:azure-cosmosdb:2.4.5
com.microsoft.azure:azure-cosmosdb-commons:2.4.5
com.microsoft.azure:azure-cosmosdb-direct:2.4.5
com.microsoft.azure:azure-cosmosdb-gateway:2.4.5
-com.microsoft.azure:azure-data-lake-store-sdk:2.3.3
+com.microsoft.azure:azure-data-lake-store-sdk:2.3.9
com.microsoft.azure:azure-keyvault-core:1.0.0
com.microsoft.sqlserver:mssql-jdbc:6.2.1.jre7
org.bouncycastle:bcpkix-jdk18on:1.78.1
@@ -552,6 +568,7 @@ org.codehaus.mojo:animal-sniffer-annotations:1.24
org.jruby.jcodings:jcodings:1.0.13
org.jruby.joni:joni:2.1.2
org.ojalgo:ojalgo:43.0
+org.reactivestreams:reactive-streams:1.0.3.jar
org.slf4j:jul-to-slf4j:1.7.36
org.slf4j:slf4j-api:1.7.36
org.slf4j:slf4j-reload4j:1.7.36
@@ -622,3 +639,8 @@ Public Domain
-------------
aopalliance:aopalliance:1.0
+
+Dom4J license
+-------------
+
+org.dom4j:dom4j:2.1.4.jar
\ No newline at end of file
diff --git a/dev-support/bin/dist-layout-stitching b/dev-support/bin/dist-layout-stitching
index d4bfd8aaada3b..e19b2e4f1d621 100755
--- a/dev-support/bin/dist-layout-stitching
+++ b/dev-support/bin/dist-layout-stitching
@@ -130,6 +130,10 @@ run cp -p "${ROOT}/README.txt" .
run copy "${ROOT}/hadoop-common-project/hadoop-common/target/hadoop-common-${VERSION}" .
run copy "${ROOT}/hadoop-common-project/hadoop-nfs/target/hadoop-nfs-${VERSION}" .
run copy "${ROOT}/hadoop-common-project/hadoop-registry/target/hadoop-registry-${VERSION}" .
+
+# cloud connectors go into common
+run copy "${ROOT}/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/target/hadoop-cloud-storage-dist-${VERSION}" .
+
run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${VERSION}" .
run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-nfs/target/hadoop-hdfs-nfs-${VERSION}" .
run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-${VERSION}" .
diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml
new file mode 100644
index 0000000000000..3482edd3c8bc7
--- /dev/null
+++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml
@@ -0,0 +1,59 @@
+
+
+ hadoop-cloud-storage
+
+ dir
+
+ false
+
+
+
+
+ ../../hadoop-tools/hadoop-aws/src/main/bin
+ /bin
+ 0755
+
+
+ ./../hadoop-tools/hadoop-aws/src/main/shellprofile.d
+
+ *
+
+ /libexec/shellprofile.d
+ 0755
+
+
+
+
+
+ /share/hadoop/common/lib
+ false
+ runtime
+ false
+
+
+ org.apache.hadoop:hadoop-annotations
+ org.apache.hadoop.thirdparty:hadoop-shaded-guava
+
+
+
+
diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml
index 871694209393f..7895f4e57142f 100644
--- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml
+++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml
@@ -57,6 +57,7 @@
**/file:/**
**/SecurityAuth.audit*
patchprocess/**
+ **/auth-keys.xml
diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml
new file mode 100644
index 0000000000000..74a282e4b887f
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml
@@ -0,0 +1,332 @@
+
+
+
+ 4.0.0
+
+ org.apache.hadoop
+ hadoop-project
+ 3.5.0-SNAPSHOT
+ ../../hadoop-project
+
+ hadoop-cloud-storage-dist
+ 3.5.0-SNAPSHOT
+ jar
+
+ Apache Hadoop Cloud Storage Distribution
+ Apache Hadoop Cloud Storage Distribution
+
+
+
+ cloud-storage
+
+
+
+
+ org.apache.hadoop
+ hadoop-annotations
+ provided
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+
+
+ org.apache.hadoop
+ hadoop-aws
+ compile
+
+
+ software.amazon.awssdk
+ *
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-azure
+ compile
+
+
+ org.apache.hadoop
+ hadoop-azure-datalake
+ compile
+
+
+ *
+ *
+
+
+
+
+ org.apache.hadoop
+ hadoop-cos
+ compile
+
+
+ *
+ *
+
+
+
+
+ org.apache.hadoop
+ hadoop-huaweicloud
+ compile
+
+
+ *
+ *
+
+
+
+
+ org.apache.hadoop
+ hadoop-tos
+ compile
+
+
+ *
+ *
+
+
+
+
+ org.apache.hadoop
+ hadoop-gcp
+ compile
+
+
+ *
+ *
+
+
+
+
+
+
+
+
+ maven-deploy-plugin
+
+ true
+
+
+
+ org.apache.rat
+ apache-rat-plugin
+
+
+
+
+
+
+
+
+
+
+ dist
+
+ false
+
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+
+
+ org.apache.hadoop
+ hadoop-assemblies
+ ${project.version}
+
+
+
+
+ dist
+ prepare-package
+
+ single
+
+
+ false
+ false
+ ${project.artifactId}-${project.version}
+
+ hadoop-cloud-storage
+
+
+
+
+
+
+
+
+
+
+
+ hadoop-aliyun-package
+
+ hadoop-aliyun-package
+
+
+
+ org.apache.hadoop
+ hadoop-aliyun
+ compile
+
+
+
+
+
+
+ hadoop-aws-package
+
+ hadoop-aws-package
+
+
+
+ org.apache.hadoop
+ hadoop-aws
+ compile
+
+
+
+
+
+
+ hadoop-azure-datalake-package
+
+ hadoop-azure-datalake-package
+
+
+
+ org.apache.hadoop
+ hadoop-azure-datalake
+ compile
+
+
+
+
+
+
+ hadoop-cos-package
+
+ hadoop-cos-package
+
+
+
+ org.apache.hadoop
+ hadoop-cos
+ compile
+
+
+
+
+
+
+ hadoop-huaweicloud-package
+
+ hadoop-huaweicloud-package
+
+
+
+ org.apache.hadoop
+ hadoop-huaweicloud
+ compile
+
+
+ com.fasterxml.jackson
+ *
+
+
+
+
+
+
+
+
+ hadoop-gcp-package
+
+ hadoop-gcp-package
+
+
+
+ org.apache.hadoop
+ hadoop-gcp
+ compile
+
+
+
+
+
+
+ hadoop-tos-package
+
+ hadoop-tos-package
+
+
+
+ org.apache.hadoop
+ hadoop-tos
+ compile
+
+
+
+
+
+
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml
index 641ad3afb1b0e..d00dc1f22c704 100755
--- a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml
@@ -169,6 +169,10 @@
log4j-api
org.apache.logging.log4j
+
+ com.fasterxml.jackson.core
+ *
+
diff --git a/hadoop-cloud-storage-project/hadoop-tos/pom.xml b/hadoop-cloud-storage-project/hadoop-tos/pom.xml
index 4bbaf74e0f892..b5bd38bdb58e3 100644
--- a/hadoop-cloud-storage-project/hadoop-tos/pom.xml
+++ b/hadoop-cloud-storage-project/hadoop-tos/pom.xml
@@ -48,11 +48,27 @@
hadoop-mapreduce-client-core
provided
+
+ org.apache.httpcomponents.client5
+ httpclient5
+
+
+ org.apache.httpcomponents.core5
+ httpcore5
+
com.volcengine
ve-tos-java-sdk-hadoop
${ve-tos-java-sdk.version}
+
+ org.apache.httpcomponents.client5
+ httpclient5
+
+
+ org.apache.httpcomponents.core5
+ httpcore5
+
org.slf4j
slf4j-api
@@ -152,32 +168,6 @@
-
- org.apache.maven.plugins
- maven-shade-plugin
-
-
- package
-
- shade
-
-
- true
-
-
- *:*
-
- META-INF/*.SF
- META-INF/*.DSA
- META-INF/*.RSA
-
-
-
-
-
-
-
-
com.github.spotbugs
spotbugs-maven-plugin
@@ -190,4 +180,45 @@
+
+
+
+
+ shade
+
+ !skipShade
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+ package
+
+ shade
+
+
+ true
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+ mozilla/public-suffix-list.txt
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/hadoop-cloud-storage-project/pom.xml b/hadoop-cloud-storage-project/pom.xml
index 036dfa3a699a9..93da918fb9bc4 100644
--- a/hadoop-cloud-storage-project/pom.xml
+++ b/hadoop-cloud-storage-project/pom.xml
@@ -34,6 +34,7 @@
hadoop-cos
hadoop-huaweicloud
hadoop-tos
+ hadoop-cloud-storage-dist
diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml
index f7aa379dac087..6974193ef084f 100644
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -77,6 +77,8 @@
4.5.13
4.4.13
+ 5.5
+ 5.3.6
1.7.36
@@ -857,6 +859,28 @@
httpcore
${httpcore.version}
+
+ org.apache.httpcomponents.client5
+ httpclient5
+ ${httpclient5.version}
+
+
+ org.slf4j
+ *
+
+
+
+
+ org.apache.httpcomponents.core5
+ httpcore5
+ ${httpcore5.version}
+
+
+ org.apache.logging.log4j
+ *
+
+
+
commons-codec
commons-codec
@@ -2157,9 +2181,9 @@
2.4.4
- com.google.cloud
- google-cloud-storage
- 2.52.0
+ com.google.cloud
+ google-cloud-storage
+ 2.52.0
diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml
index 9fa02a0ac0027..803caf8fa0648 100644
--- a/hadoop-tools/hadoop-tools-dist/pom.xml
+++ b/hadoop-tools/hadoop-tools-dist/pom.xml
@@ -92,30 +92,12 @@
pom
${project.version}
-
- org.apache.hadoop
- hadoop-aws
- compile
- ${project.version}
-
org.apache.hadoop
hadoop-kafka
compile
${project.version}
-
- org.apache.hadoop
- hadoop-azure
- compile
- ${project.version}
-
-
- org.apache.hadoop
- hadoop-aliyun
- compile
- ${project.version}
-
org.apache.hadoop
hadoop-sls
@@ -127,34 +109,12 @@
${project.version}
compile
-
- org.apache.hadoop
- hadoop-azure-datalake
- compile
- ${project.version}
-
org.apache.hadoop
hadoop-fs2img
compile
${project.version}
-
- org.apache.hadoop
- hadoop-gcp
- compile
- ${project.version}
-
-
-
- *
- *
-
-
-
diff --git a/licenses-binary/LICENSE-dom4j.txt b/licenses-binary/LICENSE-dom4j.txt
new file mode 100644
index 0000000000000..1a02acb149474
--- /dev/null
+++ b/licenses-binary/LICENSE-dom4j.txt
@@ -0,0 +1,39 @@
+Copyright 2001-2023 © MetaStuff, Ltd. and DOM4J contributors. All Rights Reserved.
+
+Redistribution and use of this software and associated documentation
+("Software"), with or without modification, are permitted provided
+that the following conditions are met:
+
+1. Redistributions of source code must retain copyright
+ statements and notices. Redistributions must also contain a
+ copy of this document.
+
+2. Redistributions in binary form must reproduce the
+ above copyright notice, this list of conditions and the
+ following disclaimer in the documentation and/or other
+ materials provided with the distribution.
+
+3. The name "DOM4J" must not be used to endorse or promote
+ products derived from this Software without prior written
+ permission of MetaStuff, Ltd. For written permission,
+ please contact dom4j-info@metastuff.com.
+
+4. Products derived from this Software may not be called "DOM4J"
+ nor may "DOM4J" appear in their names without prior written
+ permission of MetaStuff, Ltd. DOM4J is a registered
+ trademark of MetaStuff, Ltd.
+
+5. Due credit should be given to the DOM4J Project - https://dom4j.github.io/
+
+THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
+NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.