Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions spark-connect-client/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,10 @@
"java-base": "17",
"python": "3.11",
},
{
"product": "4.0.0",
"spark-k8s": "4.0.0",
"java-base": "17",
"python": "3.11",
},
]
79 changes: 58 additions & 21 deletions spark-k8s/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,17 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patche
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}

RUN <<EOF

# IMPORTANT: HBase connectors don't support Spark 4 yet, so we skip the build.
# Watch this PR for updates: https://github.com/apache/hbase-connectors/pull/130
if [[ "${PRODUCT}" == 4* ]]; then
# Create this empty directory so that following COPY layers succeed.
mkdir -p /stackable/spark/jars
# Create a dummy tarball to satisfy the build process for Spark 3.
touch hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz
exit 0
fi

cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"

NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
Expand Down Expand Up @@ -110,6 +121,7 @@ mvn \
--define hadoop-three.version="${HADOOP_VERSION}" \
--define hbase.version="${HBASE}" \
--define skipTests \
--define maven.test.skip=true \
clean package

mkdir -p /stackable/spark/jars
Expand Down Expand Up @@ -162,9 +174,6 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs

# >>> Build spark
# Compiling the tests takes a lot of time, so we skip them
# -Dmaven.test.skip=true skips both the compilation and execution of tests
# -DskipTests skips only the execution
RUN <<EOF
# Make Maven aware of custom Stackable libraries
mv /stackable/patched-libs/maven /root/.m2/repository
Expand All @@ -179,15 +188,35 @@ RUN <<EOF
ORIGINAL_VERSION="${PRODUCT}"
NEW_VERSION="${PRODUCT}-stackable${RELEASE}"

STACKABLE_HADOOP_VERSION="${HADOOP_HADOOP}-stackable${RELEASE}"

MAVEN_BIN="/tmp/apache-maven-${MAVEN_VERSION}/bin/mvn"
export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"

./dev/make-distribution.sh \
--mvn /tmp/apache-maven-${MAVEN_VERSION}/bin/mvn \
-Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE}" \
-DskipTests \
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
--no-transfer-progress \
--batch-mode
case "${PRODUCT}" in
4*)
# The Spark 4 script has a --connect option which is not available in Spark 3.
# This option is required to build Spark Connect.
# Also this option breaks the Spark 3 build so we ensure it's only provided here.
./dev/make-distribution.sh \
--mvn "${MAVEN_BIN}" \
--connect \
-Dhadoop.version="${STACKABLE_HADOOP_VERSION}" \
-DskipTests \
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
--no-transfer-progress \
--batch-mode
;;
*)
./dev/make-distribution.sh \
--mvn "${MAVEN_BIN}" \
-Dhadoop.version="${STACKABLE_HADOOP_VERSION}" \
-DskipTests \
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver \
--no-transfer-progress \
--batch-mode
;;
esac

sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" assembly/target/bom.json
EOF
Expand All @@ -198,22 +227,30 @@ EOF
# we create a new dist/connect folder, and copy them here.
RUN <<EOF

# Get the Scala binary version
SCALA_BINARY_VERSION=$( \
mvn --quiet --non-recursive --no-transfer-progress --batch-mode --file pom.xml \
org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
-DforceStdout \
-Dexpression='project.properties(scala.binary.version)')
SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}')

mkdir -p dist/connect
cd dist/connect

cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .

# The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar without the -stackable${RELEASE} suffix.
case "${PRODUCT}" in
4*)
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
;;
*)
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
;;
esac

# This link is needed by the operator and is kept for backwards compatibility.
# TODO: remove it at some time in the future.
ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar"
# Link to the spark-connect jar without the stackable suffix and scala version.
# This link supersedes the previous link.
ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect-${PRODUCT}.jar"
EOF

# <<< Build spark
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
From 2da5608928018dd017c91b904eb8f84a4f6df78a Mon Sep 17 00:00:00 2001
From: Razvan-Daniel Mihai <[email protected]>
Date: Fri, 4 Jul 2025 15:54:55 +0200
Subject: Update CycloneDX plugin

---
dev/make-distribution.sh | 1 -
pom.xml | 5 +++++
2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 16607e45ae..44e345a245 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -176,7 +176,6 @@ BUILD_COMMAND=("$MVN" clean package \
-Dmaven.javadoc.skip=true \
-Dmaven.scaladoc.skip=true \
-Dmaven.source.skip \
- -Dcyclonedx.skip=true \
$@)

# Actually build the jar
diff --git a/pom.xml b/pom.xml
index 443d46a430..632920f100 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3327,6 +3327,11 @@
<groupId>org.cyclonedx</groupId>
<artifactId>cyclonedx-maven-plugin</artifactId>
<version>2.8.0</version>
+ <configuration>
+ <projectType>application</projectType>
+ <schemaVersion>1.5</schemaVersion>
+ <skipNotDeployed>false</skipNotDeployed>
+ </configuration>
<executions>
<execution>
<phase>package</phase>
2 changes: 2 additions & 0 deletions spark-k8s/stackable/patches/4.0.0/patchable.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
base = "fa33ea000a0bda9e5a3fa1af98e8e85b8cc5e4d4"
mirror = "https://github.com/stackabletech/spark.git"
18 changes: 18 additions & 0 deletions spark-k8s/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,22 @@
"tini": "0.19.0",
"hbase_connector": "1.0.1",
},
{
"product": "4.0.0",
"java-base": "17",
"java-devel": "17",
"python": "3.11",
"hadoop/hadoop": "3.4.1",
"hbase": "2.6.2",
"aws_java_sdk_bundle": "2.24.6",
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4
"azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
"jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1
"stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
"woodstox_core": "6.5.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
"vector": "0.47.0",
"jmx_exporter": "1.3.0",
"tini": "0.19.0",
"hbase_connector": "1.0.1",
},
]