diff --git a/README.md b/README.md
index 299aa29..0af2837 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,9 @@
-[![Build Status](https://travis-ci.com/qbicsoftware/spark-benchmark-cli.svg?branch=development)](https://travis-ci.com/qbicsoftware/spark-benchmark-cli)
+[![Build Status](https://travis-ci.com/qbicsoftware/scark-cli.svg?branch=development)](https://travis-ci.com/qbicsoftware/scark-cli)
+![GitHub release](https://img.shields.io/github/release/qbicsoftware/scark-cli.svg)
+![GitHub commits since latest release](https://img.shields.io/github/commits-since/qbicsoftware/scark-cli/latest.svg)
[![Scala Steward badge](https://img.shields.io/badge/Scala_Steward-helping-brightgreen.svg?style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA4AAAAQCAMAAAARSr4IAAAAVFBMVEUAAACHjojlOy5NWlrKzcYRKjGFjIbp293YycuLa3pYY2LSqql4f3pCUFTgSjNodYRmcXUsPD/NTTbjRS+2jomhgnzNc223cGvZS0HaSD0XLjbaSjElhIr+AAAAAXRSTlMAQObYZgAAAHlJREFUCNdNyosOwyAIhWHAQS1Vt7a77/3fcxxdmv0xwmckutAR1nkm4ggbyEcg/wWmlGLDAA3oL50xi6fk5ffZ3E2E3QfZDCcCN2YtbEWZt+Drc6u6rlqv7Uk0LdKqqr5rk2UCRXOk0vmQKGfc94nOJyQjouF9H/wCc9gECEYfONoAAAAASUVORK5CYII=)](https://scala-steward.org)
+
# spark-benchmark-cli
A tool for submitting SQL queries to a Spark Cluster. Various benchmarking statistics will be calculated.
Currently MariaDB is supported out of the box.
@@ -14,7 +17,7 @@ will build the fat jar. The result will be written to ```/target/$scala-version/
## Running
```bash
-java -jar scark-cli-1.0.0.jar
+java -jar scark-cli-1.1.0.jar
```
## Usage
@@ -25,7 +28,7 @@ Benchmark Tool for evaluating the performance of a Spark Cluster. Run custom
SQL Queries inside Spark!
-s, --spark run with spark support
-l, --local run spark in local mode - requires -s option to be in effect
- -t, --table[=
] table to execute SQL query in, mandatory if running with spark support
+ -t, --table[=] list of tables to execute SQL query in, mandatory if running with spark support
-d, --driver[=] driver to access Database, e.g. org.mariadb.jdbc.Driver, mandatory if running with spark support
-q, --query[=] SQL query to execute
-c, --config[=]
@@ -45,13 +48,24 @@ You can either use ```-q``` to get a prompt for your query or supply a full quer
A query can be submitted to spark via:
```bash
/spark/bin/spark-submit --master spark://spark-master:7077 \
-/opt/spark-apps/scark-cli-1.0.0.jar -s -d org.mariadb.jdbc.Driver -c /opt/spark-data/database_properties.txt -t -q <"query">
+/opt/spark-apps/scark-cli-1.1.0.jar -s -d org.mariadb.jdbc.Driver -c /opt/spark-data/database_properties.txt -t -q <"query">
```
## Example Query
```bash
/spark/bin/spark-submit --master spark://spark-master:7077 \
-/opt/spark-apps/scark-cli-1.0.0.jar -s -d org.mariadb.jdbc.Driver -c /opt/spark-data/database_properties.txt -t Consequence -q "SELECT id FROM Consequence"
+/opt/spark-apps/scark-cli-1.1.0.jar -s -d org.mariadb.jdbc.Driver -c /opt/spark-data/database_properties.txt -t Consequence -q "SELECT id FROM Consequence"
+```
+
+## Complex Query
+```
+/spark/bin/spark-submit --master spark://spark-master:7077 \
+/opt/spark-apps/scark-cli-1.1.0.jar \
+-c /opt/spark-data/database_properties.txt \
+-s \
+-t Consequence Variant Variant_has_Consequence \
+-q "select * from Variant INNER JOIN Variant_has_Consequence ON Variant.id = Variant_has_Consequence.Variant_id INNER JOIN Consequence on Variant_has_Consequence.Consequence_id = Consequence.id" \
+-d org.mariadb.jdbc.Driver
```
## Tests
@@ -60,6 +74,6 @@ Run tests inside the sbt console from the root project directory using:
test
```
-## Known issues
+## Known Issues
Due to a bug in the MariaDB connector and Spark, mariadb in the jdbc URL has to be replaced with mysql.
Please refer to: https://github.com/qbicsoftware/spark-benchmark-cli/issues/9 .
diff --git a/build.sbt b/build.sbt
index bcdba56..a5ad514 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1,6 +1,6 @@
name := "scark-cli"
-version := "1.0.0"
+version := "1.1.0"
scalaVersion := "2.12.8"
diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala
index 2e06e1a..aa48c9a 100644
--- a/src/main/scala/Main.scala
+++ b/src/main/scala/Main.scala
@@ -72,16 +72,22 @@ object Main {
connectionProperties.put("password", s"${databaseProperties.password}")
connectionProperties.put("driver", s"${commandLineParameters.databaseDriver}")
- val table = spark.read.jdbc(databaseProperties.jdbcURL, commandLineParameters.table, connectionProperties)
- table.printSchema()
- table.show()
+ val tables = commandLineParameters.table
+
+ val dfs = for {
+ table <- tables
+ } yield (table, spark.read.jdbc(databaseProperties.jdbcURL, table, connectionProperties))
// NOTE
// Spark requires a View of a table to allow for SQL queries
// CreateOrReplaceTempView will create a temporary view of the table in memory.
// It is not persistent at this moment but you can run sql queries on top of that.
// If you want to save it you can either persist or use saveAsTable to save.
- table.createOrReplaceTempView(commandLineParameters.table)
+ for {
+ (name, df) <- dfs
+ } df.createOrReplaceTempView(name)
+
+ for (tuple <- dfs) tuple._2.printSchema()
val result = spark.sql(commandLineParameters.sqlQuery)
result.show()
diff --git a/src/main/scala/io/cli/CommandLineOptions.scala b/src/main/scala/io/cli/CommandLineOptions.scala
index e3523b2..4ec5f01 100644
--- a/src/main/scala/io/cli/CommandLineOptions.scala
+++ b/src/main/scala/io/cli/CommandLineOptions.scala
@@ -24,8 +24,9 @@ class CommandLineOptions {
var configFilePath = ""
@Option(names = Array("-t", "--table"),
- description = Array("Table to run query on. Required if using Spark."))
- var table = ""
+ description = Array("Table to run query on. Required if using Spark."),
+ arity = "1..*")
+ var table = Array[String]()
@Option(names = Array("-q", "--query"),
description = Array("SQL query to execute."),