Skip to content

Commit

Permalink
Adding Spark3.4 Support
Browse files Browse the repository at this point in the history
  • Loading branch information
KeerthiYandaOS committed Aug 14, 2023
1 parent bab43c8 commit d9757e7
Show file tree
Hide file tree
Showing 15 changed files with 21 additions and 21 deletions.
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ import scala.xml.transform.{RewriteRule, RuleTransformer}
import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _}

val condaEnvName = "synapseml"
val sparkVersion = "3.3.1"
val sparkVersion = "3.4.1"
name := "synapseml"
ThisBuild / organization := "com.microsoft.azure"
ThisBuild / scalaVersion := "2.12.15"
ThisBuild / scalaVersion := "2.12.17"

val scalaMajorVersion = 2.12

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ object PackageUtils {

val PackageName = s"synapseml_$ScalaVersionSuffix"
val PackageMavenCoordinate = s"$PackageGroup:$PackageName:${BuildInfo.version}"
private val AvroCoordinate = "org.apache.spark:spark-avro_2.12:3.3.1"
private val AvroCoordinate = "org.apache.spark:spark-avro_2.12:3.4.1"
val PackageRepository: String = SparkMLRepository

// If testing onnx package with snapshots repo, make sure to switch to using
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ object RTestGen {
| "spark.sql.shuffle.partitions=10",
| "spark.sql.crossJoin.enabled=true")
|
|sc <- spark_connect(master = "local", version = "3.3.1", config = conf)
|sc <- spark_connect(master = "local", version = "3.4.1", config = conf)
|
|""".stripMargin, StandardOpenOption.CREATE)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ object SynapseUtilities {
| "nodeSizeFamily": "MemoryOptimized",
| "provisioningState": "Succeeded",
| "sessionLevelPackagesEnabled": "true",
| "sparkVersion": "3.3"
| "sparkVersion": "3.4"
| }
|}
|""".stripMargin
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies:
- r-devtools=2.4.2
- pip:
- pyarrow>=0.15.0
- pyspark==3.3.1
- pyspark==3.4.1
- pandas==1.2.5
- wheel
- sphinx==4.2.0
Expand Down
4 changes: 2 additions & 2 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,8 @@ jobs:
fi
sbt publishM2
SPARK_VERSION=3.3.1
HADOOP_VERSION=3.2
SPARK_VERSION=3.4.1
HADOOP_VERSION=3.3
wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
(timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR)
- task: PublishTestResults@2
Expand Down
4 changes: 2 additions & 2 deletions start
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash

export OPENMPI_VERSION="3.1.2"
export SPARK_VERSION="3.3.1"
export HADOOP_VERSION="2.7"
export SPARK_VERSION="3.4.1"
export HADOOP_VERSION="3.3"
export SYNAPSEML_VERSION="0.11.1" # Binder compatibility version

echo "Beginning Spark Session..."
Expand Down
2 changes: 1 addition & 1 deletion tools/docker/demo/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
ARG SYNAPSEML_VERSION=0.11.1
ARG DEBIAN_FRONTEND=noninteractive

ENV SPARK_VERSION=3.3.1
ENV SPARK_VERSION=3.4.1
ENV HADOOP_VERSION=3
ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION}
ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64
Expand Down
2 changes: 1 addition & 1 deletion tools/docker/minimal/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
ARG SYNAPSEML_VERSION=0.11.1
ARG DEBIAN_FRONTEND=noninteractive

ENV SPARK_VERSION=3.3.1
ENV SPARK_VERSION=3.4.1
ENV HADOOP_VERSION=3
ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION}
ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64
Expand Down
8 changes: 4 additions & 4 deletions tools/dotnet/dotnetSetup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ echo "##vso[task.setvariable variable=DOTNET_WORKER_DIR]$DOTNET_WORKER_DIR"
# Install Sleet
dotnet tool install -g sleet

# Install Apache Spark-3.3
curl https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz -o spark-3.3.1-bin-hadoop3.tgz
# Install Apache Spark-3.4.1
curl https://archive.apache.org/dist/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz -o spark-3.4.1-bin-hadoop3.tgz
mkdir ~/bin
tar -xzvf spark-3.3.1-bin-hadoop3.tgz -C ~/bin
export SPARK_HOME=~/bin/spark-3.3.1-bin-hadoop3/
tar -xzvf spark-3.4.1-bin-hadoop3.tgz -C ~/bin
export SPARK_HOME=~/bin/spark-3.4.1-bin-hadoop3/
export PATH=$SPARK_HOME/bin:$PATH
echo "##vso[task.setvariable variable=SPARK_HOME]$SPARK_HOME"
echo "##vso[task.setvariable variable=PATH]$SPARK_HOME/bin:$PATH"
2 changes: 1 addition & 1 deletion tools/helm/livy/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ LABEL maintainer="Dalitso Banda [email protected]"

# Get Spark from US Apache mirror.
ENV APACHE_SPARK_VERSION 2.4.5
ENV HADOOP_VERSION 3.2.1
ENV HADOOP_VERSION 3.3.4

RUN echo "$LOG_TAG Getting SPARK_HOME" && \
apt-get update && \
Expand Down
2 changes: 1 addition & 1 deletion tools/helm/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ LABEL maintainer="Dalitso Banda [email protected]"

# Get Spark from US Apache mirror.
ENV APACHE_SPARK_VERSION 2.4.5
ENV HADOOP_VERSION 3.2.1
ENV HADOOP_VERSION 3.3.4

RUN echo "$LOG_TAG Getting SPARK_HOME" && \
apt-get update && \
Expand Down
2 changes: 1 addition & 1 deletion tools/helm/spark/mini.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ ARG k8s_tests=kubernetes/tests

# Get Spark from US Apache mirror.
ENV APACHE_SPARK_VERSION 2.4.3
ENV HADOOP_VERSION 3.1.2
ENV HADOOP_VERSION 3.3.4
ENV HADOOP_GIT_COMMIT="release-3.2.0-RC1"

ENV SPARK_HOME=/opt/spark
Expand Down
2 changes: 1 addition & 1 deletion tools/helm/zeppelin/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ LABEL maintainer="Dalitso Banda [email protected]"

# Get Spark from US Apache mirror.
ENV APACHE_SPARK_VERSION 2.4.5
ENV HADOOP_VERSION 3.2.1
ENV HADOOP_VERSION 3.3.4

RUN echo "$LOG_TAG Getting SPARK_HOME" && \
apt-get update && \
Expand Down
2 changes: 1 addition & 1 deletion tools/tests/run_r_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ if (!require("sparklyr")) {
library("sparklyr")
}

spark_install_tar(paste(getwd(), "/../../../../../../spark-3.3.1-bin-hadoop3.2.tgz", sep = ""))
spark_install_tar(paste(getwd(), "/../../../../../../spark-3.4.1-bin-hadoop3.3.tgz", sep = ""))

options("testthat.output_file" = "../../../../r-test-results.xml")
devtools::test(reporter = JunitReporter$new())

0 comments on commit d9757e7

Please sign in to comment.