Skip to content

Commit

Permalink
chore: bump to spark 3.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
JessicaXYWang authored and mhamilton723 committed Jul 10, 2023
1 parent e487c69 commit bab43c8
Show file tree
Hide file tree
Showing 15 changed files with 29 additions and 29 deletions.
Empty file.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import scala.xml.transform.{RewriteRule, RuleTransformer}
import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _}

val condaEnvName = "synapseml"
val sparkVersion = "3.2.3"
val sparkVersion = "3.3.1"
name := "synapseml"
ThisBuild / organization := "com.microsoft.azure"
ThisBuild / scalaVersion := "2.12.15"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ abstract class RegressionBase {
* when running on Spark 3.0.* and 3.1.*.
* Workaround: use reflection to construct the implementation.
*/
//TODO: Check for spark 3.3.0
implicit lazy val sumImpl: sum.Impl[BroadcastedColumns[BDM[Double], BDV[Double]], Transpose[BDV[Double]]] = {
Try {
// This works for breeze 1.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ object RTestGen {
| "spark.sql.shuffle.partitions=10",
| "spark.sql.crossJoin.enabled=true")
|
|sc <- spark_connect(master = "local", version = "3.2.4", config = conf)
|sc <- spark_connect(master = "local", version = "3.3.1", config = conf)
|
|""".stripMargin, StandardOpenOption.CREATE)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@ import java.io.File
import scala.collection.mutable.ListBuffer

class DatabricksGPUTests extends DatabricksTestHelper {
val horovodInstallationScript: File = FileUtilities.join(
BuildInfo.baseDirectory.getParent, "deep-learning",
"src", "main", "python", "horovod_installation.sh").getCanonicalFile
uploadFileToDBFS(horovodInstallationScript, "/FileStore/horovod-fix-commit/horovod_installation.sh")
val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 2, GpuPoolId, GPUInitScripts)
val clusterId: String = createClusterInPool(GPUClusterName, AdbGpuRuntime, 2, GpuPoolId, "[]")
val jobIdsToCancel: ListBuffer[Int] = databricksTestHelper(
clusterId, GPULibraries, GPUNotebooks)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ object DatabricksUtilities {

// ADB Info
val Region = "eastus"
val PoolName = "synapseml-build-10.4"
val GpuPoolName = "synapseml-build-10.4-gpu"
val AdbRuntime = "10.4.x-scala2.12"
val AdbGpuRuntime = "10.4.x-gpu-ml-scala2.12"
val PoolName = "synapseml-build-11.2"
val GpuPoolName = "synapseml-build-11.2-gpu"
val AdbRuntime = "11.2.x-scala2.12"
// https://learn.microsoft.com/en-us/azure/databricks/release-notes/runtime/11.2
val AdbGpuRuntime = "11.2.x-gpu-ml-scala2.12"
val NumWorkers = 5
val AutoTerminationMinutes = 15

Expand Down Expand Up @@ -72,6 +73,8 @@ object DatabricksUtilities {
// TODO: install synapse.ml.dl wheel package here
val GPULibraries: String = List(
Map("maven" -> Map("coordinates" -> PackageMavenCoordinate, "repo" -> PackageRepository)),
Map("pypi" -> Map("package" -> "pytorch-lightning==1.5.0")),
Map("pypi" -> Map("package" -> "torchvision==0.12.0")),
Map("pypi" -> Map("package" -> "transformers==4.15.0")),
Map("pypi" -> Map("package" -> "petastorm==0.12.0"))
).toJson.compactPrint
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ object SynapseUtilities {
| "nodeSizeFamily": "MemoryOptimized",
| "provisioningState": "Succeeded",
| "sessionLevelPackagesEnabled": "true",
| "sparkVersion": "3.2"
| "sparkVersion": "3.3"
| }
|}
|""".stripMargin
Expand Down
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ dependencies:
- r-devtools=2.4.2
- pip:
- pyarrow>=0.15.0
- numpy>=1.19.3
- pyspark==3.2.3
- pyspark==3.3.1
- pandas==1.2.5
- wheel
- sphinx==4.2.0
Expand All @@ -32,6 +31,7 @@ dependencies:
- twine
- jupyter
- mlflow
- numpy==1.23.0
- torch==1.11.0
- torchvision==0.12.0
- horovod==0.25.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
"featurizer = VectorAssembler(inputCols=feature_cols, outputCol=\"features\")\n",
"lr_train_data = featurizer.transform(train_data)[\"target\", \"features\"]\n",
"lr_test_data = featurizer.transform(test_data)[\"target\", \"features\"]\n",
"display(lr_train_data.limit(10).toPandas())"
"display(lr_train_data.limit(10))"
]
},
{
Expand All @@ -156,7 +156,7 @@
"lr_model = lr.fit(lr_train_data)\n",
"lr_predictions = lr_model.transform(lr_test_data)\n",
"\n",
"display(lr_predictions.limit(10).toPandas())"
"display(lr_predictions.limit(10))"
]
},
{
Expand Down Expand Up @@ -210,7 +210,7 @@
"\n",
"vw_train_data = vw_featurizer.transform(train_data)[\"target\", \"features\"]\n",
"vw_test_data = vw_featurizer.transform(test_data)[\"target\", \"features\"]\n",
"display(vw_train_data.limit(10).toPandas())"
"display(vw_train_data.limit(10))"
]
},
{
Expand All @@ -236,7 +236,7 @@
"vw_model = vwr.fit(vw_train_data_2.repartition(1))\n",
"vw_predictions = vw_model.transform(vw_test_data)\n",
"\n",
"display(vw_predictions.limit(10).toPandas())"
"display(vw_predictions.limit(10))"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ jobs:
fi
sbt publishM2
SPARK_VERSION=3.2.4
SPARK_VERSION=3.3.1
HADOOP_VERSION=3.2
wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
(timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR) || (echo "retrying" && timeout 20m sbt "project $(PACKAGE)" coverage testR)
Expand Down
2 changes: 1 addition & 1 deletion start
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

export OPENMPI_VERSION="3.1.2"
export SPARK_VERSION="3.2.3"
export SPARK_VERSION="3.3.1"
export HADOOP_VERSION="2.7"
export SYNAPSEML_VERSION="0.11.1" # Binder compatibility version

Expand Down
4 changes: 2 additions & 2 deletions tools/docker/demo/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
ARG SYNAPSEML_VERSION=0.11.1
ARG DEBIAN_FRONTEND=noninteractive

ENV SPARK_VERSION=3.2.3
ENV HADOOP_VERSION=2.7
ENV SPARK_VERSION=3.3.1
ENV HADOOP_VERSION=3
ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION}
ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64

Expand Down
4 changes: 2 additions & 2 deletions tools/docker/minimal/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
ARG SYNAPSEML_VERSION=0.11.1
ARG DEBIAN_FRONTEND=noninteractive

ENV SPARK_VERSION=3.2.3
ENV HADOOP_VERSION=2.7
ENV SPARK_VERSION=3.3.1
ENV HADOOP_VERSION=3
ENV SYNAPSEML_VERSION=${SYNAPSEML_VERSION}
ENV JAVA_HOME /usr/lib/jvm/java-1.11.0-openjdk-amd64

Expand Down
8 changes: 4 additions & 4 deletions tools/dotnet/dotnetSetup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ echo "##vso[task.setvariable variable=DOTNET_WORKER_DIR]$DOTNET_WORKER_DIR"
# Install Sleet
dotnet tool install -g sleet

# Install Apache Spark-3.2
curl https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2.tgz -o spark-3.2.0-bin-hadoop3.2.tgz
# Install Apache Spark-3.3
curl https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz -o spark-3.3.1-bin-hadoop3.tgz
mkdir ~/bin
tar -xzvf spark-3.2.0-bin-hadoop3.2.tgz -C ~/bin
export SPARK_HOME=~/bin/spark-3.2.0-bin-hadoop3.2/
tar -xzvf spark-3.3.1-bin-hadoop3.tgz -C ~/bin
export SPARK_HOME=~/bin/spark-3.3.1-bin-hadoop3/
export PATH=$SPARK_HOME/bin:$PATH
echo "##vso[task.setvariable variable=SPARK_HOME]$SPARK_HOME"
echo "##vso[task.setvariable variable=PATH]$SPARK_HOME/bin:$PATH"
2 changes: 1 addition & 1 deletion tools/tests/run_r_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ if (!require("sparklyr")) {
library("sparklyr")
}

spark_install_tar(paste(getwd(), "/../../../../../../spark-3.2.4-bin-hadoop3.2.tgz", sep = ""))
spark_install_tar(paste(getwd(), "/../../../../../../spark-3.3.1-bin-hadoop3.2.tgz", sep = ""))

options("testthat.output_file" = "../../../../r-test-results.xml")
devtools::test(reporter = JunitReporter$new())

0 comments on commit bab43c8

Please sign in to comment.