From adc2ffa17d613710af0ebf3221ac06adc699cb6e Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Tue, 21 May 2019 11:16:38 +0200 Subject: [PATCH 1/4] [FIX] Removed moms parser spaghetti --- .../scala/io/parser/PropertiesParser.scala | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/main/scala/io/parser/PropertiesParser.scala b/src/main/scala/io/parser/PropertiesParser.scala index f8e8793..d9cae47 100644 --- a/src/main/scala/io/parser/PropertiesParser.scala +++ b/src/main/scala/io/parser/PropertiesParser.scala @@ -10,22 +10,22 @@ object PropertiesParser { val lines = Source.fromFile(propertiesFileName).getLines.toList val split = for (line <- lines) yield line.split(":") - // TODO Moms' Spaghetti Bolognese var host = split.head(1) + ":" + split.head(2) + ":" + split.head(3) - .filterNot((x: Char) => x.equals('\"')) - host = host.replaceAll("\\s", "") - val port = split(1)(1) - .filterNot((x: Char) => x.equals('\"')) - .filterNot((x: Char) => x.isWhitespace) - val name = split(2)(1) - .filterNot((x: Char) => x.equals('\"')) - .filterNot((x: Char) => x.isWhitespace) - val password = split(3)(1) - .filterNot((x: Char) => x.equals('\"')) - .filterNot((x: Char) => x.isWhitespace) - val user = split(4)(1) - .filterNot((x: Char) => x.equals('\"')) - .filterNot((x: Char) => x.isWhitespace) + var port = split(1)(1) + var name = split(2)(1) + var password = split(3)(1) + var user = split(4)(1) + val properties = List(host, port, name, password, user) + + // remove any backslashes and whitespaces + val formatted_properties = properties + .map(string => string.filterNot((x: Char) => x.equals('\"'))) + .map(string => string.filterNot((x: Char) => x.isWhitespace)) + host = formatted_properties.head + port = formatted_properties(1) + name = formatted_properties(2) + password = formatted_properties(3) + user = formatted_properties(4) DatabaseProperties(host, user, password, port, name) } From 0e5db1ba7632129619d8b4f2ff0794cadca327c4 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Tue, 21 May 2019 15:40:19 +0200 Subject: [PATCH 2/4] [FEATURE] Launchable script - may not yet be working as expected --- build.sbt | 9 +++++++++ src/main/scala/io/parser/PropertiesParser.scala | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index d92178b..6f86ba4 100644 --- a/build.sbt +++ b/build.sbt @@ -22,3 +22,12 @@ assemblyMergeStrategy in assembly := { case PathList("META-INF", xs @ _*) => MergeStrategy.discard case x => MergeStrategy.first } + +// https://github.com/sbt/sbt-assembly +// prepending a launch script +// this is not yet working as expected! +import sbtassembly.AssemblyPlugin.defaultUniversalScript + +assemblyOption in assembly := (assemblyOption in assembly).value.copy(prependShellScript = Some(defaultUniversalScript(shebang = false))) + +assemblyJarName in assembly := s"${name.value}-${version.value}" diff --git a/src/main/scala/io/parser/PropertiesParser.scala b/src/main/scala/io/parser/PropertiesParser.scala index d9cae47..5329068 100644 --- a/src/main/scala/io/parser/PropertiesParser.scala +++ b/src/main/scala/io/parser/PropertiesParser.scala @@ -30,4 +30,5 @@ object PropertiesParser { DatabaseProperties(host, user, password, port, name) } -} \ No newline at end of file +} + From 4075f439e6d6068e0749442e030bbe97e144822a Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Tue, 9 Jul 2019 13:16:37 +0200 Subject: [PATCH 3/4] [FEATURE] Renamed to scark, finalized 1.0.0 release --- README.md | 12 +++++++----- build.sbt | 13 ++----------- src/main/scala/Main.scala | 39 ++++++++++++++++++++------------------- 3 files changed, 29 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 020e9e8..299aa29 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [![Build Status](https://travis-ci.com/qbicsoftware/spark-benchmark-cli.svg?branch=development)](https://travis-ci.com/qbicsoftware/spark-benchmark-cli) +[![Scala Steward badge](https://img.shields.io/badge/Scala_Steward-helping-brightgreen.svg?style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA4AAAAQCAMAAAARSr4IAAAAVFBMVEUAAACHjojlOy5NWlrKzcYRKjGFjIbp293YycuLa3pYY2LSqql4f3pCUFTgSjNodYRmcXUsPD/NTTbjRS+2jomhgnzNc223cGvZS0HaSD0XLjbaSjElhIr+AAAAAXRSTlMAQObYZgAAAHlJREFUCNdNyosOwyAIhWHAQS1Vt7a77/3fcxxdmv0xwmckutAR1nkm4ggbyEcg/wWmlGLDAA3oL50xi6fk5ffZ3E2E3QfZDCcCN2YtbEWZt+Drc6u6rlqv7Uk0LdKqqr5rk2UCRXOk0vmQKGfc94nOJyQjouF9H/wCc9gECEYfONoAAAAASUVORK5CYII=)](https://scala-steward.org) # spark-benchmark-cli A tool for submitting SQL queries to a Spark Cluster. Various benchmarking statistics will be calculated. @@ -9,11 +10,11 @@ SBT assembly plugin is configured. Hence, in the project root: ```bash sbt assembly ``` -will build the fat jar. The result will be written to /target/$scala-version/$name-assembly-$version.jar +will build the fat jar. The result will be written to ```/target/$scala-version/$name-assembly-$version.jar``` ## Running ```bash -java -jar spark-benchmark-cli-assembly-$version.jar +java -jar scark-cli-1.0.0.jar ``` ## Usage @@ -34,7 +35,8 @@ SQL Queries inside Spark! Required parameters are: ``` -c, --config= --q, --query +-t --table[=] +-q, --query[=] ``` Queries are optionally interactive. You can either use ```-q``` to get a prompt for your query or supply a full query when running the tool: ```--q[=]```. @@ -43,13 +45,13 @@ You can either use ```-q``` to get a prompt for your query or supply a full quer A query can be submitted to spark via: ```bash /spark/bin/spark-submit --master spark://spark-master:7077 \ -/opt/spark-apps/spark-benchmark-cli-assembly-0.1.jar -s -d org.mariadb.jdbc.Driver -c /opt/spark-data/database_properties.txt -t
-q <"query"> +/opt/spark-apps/scark-cli-1.0.0.jar -s -d org.mariadb.jdbc.Driver -c /opt/spark-data/database_properties.txt -t
-q <"query"> ``` ## Example Query ```bash /spark/bin/spark-submit --master spark://spark-master:7077 \ -/opt/spark-apps/spark-benchmark-cli-assembly-0.1.jar -s -d org.mariadb.jdbc.Driver -c /opt/spark-data/database_properties.txt -t Consequence -q "SELECT id FROM Consequence" +/opt/spark-apps/scark-cli-1.0.0.jar -s -d org.mariadb.jdbc.Driver -c /opt/spark-data/database_properties.txt -t Consequence -q "SELECT id FROM Consequence" ``` ## Tests diff --git a/build.sbt b/build.sbt index 6f86ba4..bcdba56 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ -name := "spark-benchmark-cli" +name := "scark-cli" -version := "0.1" +version := "1.0.0" scalaVersion := "2.12.8" @@ -22,12 +22,3 @@ assemblyMergeStrategy in assembly := { case PathList("META-INF", xs @ _*) => MergeStrategy.discard case x => MergeStrategy.first } - -// https://github.com/sbt/sbt-assembly -// prepending a launch script -// this is not yet working as expected! -import sbtassembly.AssemblyPlugin.defaultUniversalScript - -assemblyOption in assembly := (assemblyOption in assembly).value.copy(prependShellScript = Some(defaultUniversalScript(shebang = false))) - -assemblyJarName in assembly := s"${name.value}-${version.value}" diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index ece718c..2e06e1a 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -9,9 +9,17 @@ import com.typesafe.scalalogging.Logger object Main { - val LOG = Logger("Spark Benchmark CLI") + val LOG = Logger("scark-cli") def main(args: Array[String]) { + LOG.info( + """ + | _________ __ .__ .__ + | / _____/ ____ _____ _______| | __ ____ | | |__| + | \_____ \_/ ___\\__ \\_ __ \ |/ / ______ _/ ___\| | | | + | / \ \___ / __ \| | \/ < /_____/ \ \___| |_| | + |/_______ /\___ >____ /__| |__|_ \ \___ >____/__| + | \/ \/ \/ \/ \/ """.stripMargin) // parse commandline parameters, get database properties val commandLineParser = new CommandLineParser() val commandLineParameters = commandLineParser.parseCommandLineParameters(args) @@ -32,16 +40,17 @@ object Main { val spark = if (commandLineParameters.localMode) { LOG.info("Running Spark in local mode!") + LOG.warn("scark-cli assumes that your mariadb-java-client jar is in /opt/spark-apps") SparkSession .builder() .appName("Spark Benchmark CLI") .config("spark.master", "local") - .config("spark.driver.extraClassPath", "/opt/spark-apps/spark-apps/mariadb-java-client-2.4.1.jar") + .config("spark.driver.extraClassPath", "/opt/spark-apps/mariadb-java-client-2.4.1.jar") .getOrCreate() } else { SparkSession .builder() - .appName("Spark Benchmark CLI") + .appName("scark-cli") // .config("spark.some.config.option", "some-value") .getOrCreate() } @@ -50,14 +59,11 @@ object Main { Class.forName("org.mariadb.jdbc.Driver") - // test database connection + // connect to the database try { val connection = DriverManager.getConnection(databaseProperties.jdbcURL, databaseProperties.user, databaseProperties.password) - if (!connection.isClosed) { - LOG.error("Connection to the database did not close automatically when performing a connection test!") - } } catch { - case e: Exception => LOG.error("Something went wrong when attempting to connect to the database. %s", e.getMessage) + case e: Exception => LOG.error(s"Unable to connect to the database ${e.getMessage}") } // Spark likes working with properties, hence we create a properties object @@ -72,7 +78,8 @@ object Main { // NOTE // Spark requires a View of a table to allow for SQL queries - // CreateOrReplaceTempView will create a temporary view of the table in memory. It is not persistent at this moment but you can run sql query on top of that. + // CreateOrReplaceTempView will create a temporary view of the table in memory. + // It is not persistent at this moment but you can run sql queries on top of that. // If you want to save it you can either persist or use saveAsTable to save. table.createOrReplaceTempView(commandLineParameters.table) @@ -83,9 +90,7 @@ object Main { LOG.info("Spark support has been disabled!") // connect - // Class.forName("org.mariadb.jdbc.Driver") val connection = DriverManager.getConnection(databaseProperties.jdbcURL, databaseProperties.user, databaseProperties.password) - connection.isClosed // execute query val statement = connection.createStatement() @@ -95,19 +100,15 @@ object Main { val list = Iterator.from(0).takeWhile(_ => rs.next()).map(_ => rs.getString(1)).toList println(list) - // while (rs.next()) { - // println(rs.getString(1)) //or rs.getString("column name"); - // } - connection.isClosed } } catch { - case eie: ExceptionInInitializerError => { + case eie: ExceptionInInitializerError => LOG.error("__________________________________________________________________________________________") - LOG.error("Hadoop support is possibly wrongly configured or missing! " + eie.getException.getMessage) + LOG.error(s"Hadoop support is possibly wrongly configured or missing! ${eie.getException.getMessage}") LOG.error("__________________________________________________________________________________________") - } - case e: Exception => LOG.error("Hadoop support is possibly wrongly configured or missing! " + e.getMessage) + case e: Exception => LOG.error(s"Hadoop support is possibly wrongly configured or missing! ${e.getMessage}") + LOG.error("Don't forget to specify the name of the database driver!") } } From f27f499a38c9f066be4c9f6fbeea931ec8d13418 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Tue, 9 Jul 2019 13:21:08 +0200 Subject: [PATCH 4/4] [FIX] moved from oraclejdk8 to openjdk8 in travis --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 12fa179..5d6529c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: scala -jdk: oraclejdk8 +jdk: + - openjdk8 scala: - 2.12.8