From 73c7f483cae23c59a751ce87f833ced067ff3a71 Mon Sep 17 00:00:00 2001 From: Christian Herrera Date: Thu, 4 Jul 2024 13:36:54 +0200 Subject: [PATCH 1/4] Add essential spark dependencies --- src/main/g8/default.properties | 3 +++ src/main/g8/project/Dependencies.scala | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/g8/default.properties b/src/main/g8/default.properties index ea4cc27..44ea901 100644 --- a/src/main/g8/default.properties +++ b/src/main/g8/default.properties @@ -11,5 +11,8 @@ scalafmt_version = maven(org.scalameta, scalafmt-core_2.13, stable) scalatest_version = maven(org.scalatest, scalatest_2.13, stable) scalamock_version = maven(org.scalamock, scalamock_2.13, stable) nscala-time_version = maven(com.github.nscala-time, nscala-time_2.13, stable) +spark_version = maven(org.apache.spark, spark-core_2.13, stable) +spark_sql_version = maven(org.apache.spark, spark-sql_2.13, stable) +spark_streaming_version = maven(org.apache.spark, spark-streaming_2.13, stable) pprint_version = maven(com.lihaoyi, pprint_2.13, stable) verbatim = install-hooks.sh pre-push diff --git a/src/main/g8/project/Dependencies.scala b/src/main/g8/project/Dependencies.scala index 7f6b922..239ff62 100644 --- a/src/main/g8/project/Dependencies.scala +++ b/src/main/g8/project/Dependencies.scala @@ -2,10 +2,12 @@ import sbt._ object Dependencies { private val prod = Seq( - "com.github.nscala-time" %% "nscala-time" % "$nscala-time_version$", - "com.lihaoyi" %% "pprint" % "$pprint_version$" + "com.github.nscala-time" %% "nscala-time" % "$nscala-time_version$", + "com.lihaoyi" %% "pprint" % "$pprint_version$", + "org.apache.spark" %% "spark-core" % "$spark_version$" % Provided, + "org.apache.spark" %% "spark-sql" % "$spark_version$" % Provided, + "org.apache.spark" %% "spark-streaming" % "$spark_version$" % Provided ) - private val test = Seq( "org.scalatest" %% "scalatest" % "$scalatest_version$", "org.scalamock" %% "scalamock" % "$scalamock_version$" From 88f7d7a7945909c81dc585f59b8d2633733a0a7d Mon Sep 17 00:00:00 2001 From: Christian Herrera Date: Thu, 4 Jul 2024 13:37:44 +0200 Subject: [PATCH 2/4] Modify the test to do it with spark --- .../test/$package$/$name__Camel$Test.scala | 9 +- .../src/test/$package$/SparkTestHelper.scala | 83 +++++++++++++++++++ 2 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 src/main/g8/src/test/$package$/SparkTestHelper.scala diff --git a/src/main/g8/src/test/$package$/$name__Camel$Test.scala b/src/main/g8/src/test/$package$/$name__Camel$Test.scala index c0853f4..52b3700 100644 --- a/src/main/g8/src/test/$package$/$name__Camel$Test.scala +++ b/src/main/g8/src/test/$package$/$name__Camel$Test.scala @@ -1,9 +1,9 @@ package $package$ -import org.scalatest.wordspec.AnyWordSpec -import org.scalatest.matchers.should._ +import $package$.$name;format="Camel"$ +import org.apache.spark.sql.Row -final class $name;format="Camel"$Test extends AnyWordSpec with Matchers { +final class $name;format="Camel"$Test extends SparkTestHelper { "$name;format="Camel"$" should { "greet" in { val $name;format="camel"$ = new $name;format="Camel"$ @@ -11,7 +11,8 @@ final class $name;format="Camel"$Test extends AnyWordSpec with Matchers { val nameToGreet = "Codely" val greeting = $name;format="camel"$.greet(nameToGreet) - greeting shouldBe "Hello " + nameToGreet + import testSQLImplicits._ + Seq(greeting).toDF("greeting").collect() shouldBe Array(Row("Hello Codely")) } } } diff --git a/src/main/g8/src/test/$package$/SparkTestHelper.scala b/src/main/g8/src/test/$package$/SparkTestHelper.scala new file mode 100644 index 0000000..d056abe --- /dev/null +++ b/src/main/g8/src/test/$package$/SparkTestHelper.scala @@ -0,0 +1,83 @@ +package $package$ + +import org.apache.commons.io.FileUtils +import org.apache.spark.sql.{SQLContext, SQLImplicits, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.File +import java.nio.file.Files +import scala.reflect.io.Directory + +trait SparkTestHelper + extends AnyWordSpec + with BeforeAndAfterEach + with BeforeAndAfterAll + with Matchers { + + private val sparkSession = SparkSession + .builder() + .master("local[*]") + .appName("test-spark-session") + .config(sparkConfiguration) + //.enableHiveSupport() uncomment this if you want to use Hive + .getOrCreate() + + protected var tempDir: String = _ + + protected implicit def spark: SparkSession = sparkSession + + protected def sc: SparkContext = sparkSession.sparkContext + + + protected def sparkConfiguration: SparkConf = + new SparkConf() + /* Uncomment this if you want to use Delta Lake + + .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .set( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog" + ) + */ + + override protected def beforeAll(): Unit = { + super.beforeAll() + clearTemporaryDirectories() + } + + override protected def beforeEach(): Unit = { + super.beforeEach() + tempDir = Files.createTempDirectory(this.getClass.toString).toString + } + + override protected def afterAll(): Unit = { + super.afterAll() + sparkSession.stop() + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + clearTemporaryDirectories() + } + + override protected def afterEach(): Unit = { + super.afterEach() + new Directory(new File(tempDir)).deleteRecursively() + spark.sharedState.cacheManager.clearCache() + spark.sessionState.catalog.reset() + } + + protected object testSQLImplicits extends SQLImplicits { + protected override def _sqlContext: SQLContext = sparkSession.sqlContext + } + + private def clearTemporaryDirectories(): Unit = { + val warehousePath = new File("spark-warehouse").getAbsolutePath + FileUtils.deleteDirectory(new File(warehousePath)) + + val metastoreDbPath = new File("metastore_db").getAbsolutePath + FileUtils.deleteDirectory(new File(metastoreDbPath)) + } + +} From e86be0d402df8af157f7312083200c0c00882441 Mon Sep 17 00:00:00 2001 From: Christian Herrera Date: Thu, 4 Jul 2024 13:45:26 +0200 Subject: [PATCH 3/4] Fix java version ci due to spark incompatibility --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8330c9..41b735a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/setup-java@v4 with: distribution: 'zulu' - java-version: '21' + java-version: '11' cache: 'sbt' - name: 👌 Run "pre-push" tasks (compile and style-check) run: sbt prep From 9959375f78c26ca807386ed8806c10214d52a4c1 Mon Sep 17 00:00:00 2001 From: Christian Herrera Date: Fri, 5 Jul 2024 11:49:54 +0200 Subject: [PATCH 4/4] Remove unused code --- src/main/g8/src/test/$package$/$name__Camel$Test.scala | 1 - src/main/g8/src/test/$package$/SparkTestHelper.scala | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/main/g8/src/test/$package$/$name__Camel$Test.scala b/src/main/g8/src/test/$package$/$name__Camel$Test.scala index 52b3700..f0f79aa 100644 --- a/src/main/g8/src/test/$package$/$name__Camel$Test.scala +++ b/src/main/g8/src/test/$package$/$name__Camel$Test.scala @@ -1,6 +1,5 @@ package $package$ -import $package$.$name;format="Camel"$ import org.apache.spark.sql.Row final class $name;format="Camel"$Test extends SparkTestHelper { diff --git a/src/main/g8/src/test/$package$/SparkTestHelper.scala b/src/main/g8/src/test/$package$/SparkTestHelper.scala index d056abe..5423c56 100644 --- a/src/main/g8/src/test/$package$/SparkTestHelper.scala +++ b/src/main/g8/src/test/$package$/SparkTestHelper.scala @@ -31,7 +31,6 @@ trait SparkTestHelper protected def sc: SparkContext = sparkSession.sparkContext - protected def sparkConfiguration: SparkConf = new SparkConf() /* Uncomment this if you want to use Delta Lake @@ -79,5 +78,4 @@ trait SparkTestHelper val metastoreDbPath = new File("metastore_db").getAbsolutePath FileUtils.deleteDirectory(new File(metastoreDbPath)) } - }