From 2b4e098f715a318075d20a200b7d747eca00e259 Mon Sep 17 00:00:00 2001 From: Marc LAMY Date: Wed, 10 Apr 2024 19:43:10 +0200 Subject: [PATCH] update the documentation --- docs/content/advanced/advanced.md | 2 +- docs/content/configuration/configuration.md | 2 +- docs/content/getting-started.md | 4 +- docs/content/tests.md | 107 ++++++++++++++++++++ 4 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 docs/content/tests.md diff --git a/docs/content/advanced/advanced.md b/docs/content/advanced/advanced.md index 598d55e..0aebf24 100644 --- a/docs/content/advanced/advanced.md +++ b/docs/content/advanced/advanced.md @@ -2,6 +2,6 @@ title: Advanced layout: default has_children: true -nav_order: 6 +nav_order: 7 --- # Advanced diff --git a/docs/content/configuration/configuration.md b/docs/content/configuration/configuration.md index f95e33c..0dae97f 100644 --- a/docs/content/configuration/configuration.md +++ b/docs/content/configuration/configuration.md @@ -2,7 +2,7 @@ title: Configuration layout: default has_children: true -nav_order: 5 +nav_order: 6 --- # Configuration diff --git a/docs/content/getting-started.md b/docs/content/getting-started.md index 653a407..75bad1e 100644 --- a/docs/content/getting-started.md +++ b/docs/content/getting-started.md @@ -17,7 +17,7 @@ nav_order: 2 ## Installation -Data I/O was built with Spark 3.3.2 and Scala 2.12. Support for prior versions is not guaranteed. +Data I/O was built and tested with Spark 3.2.1/3.3.2/3.4.1 and Scala 2.12. Support for prior versions is not guaranteed. {: .warning} Published releases are available on GitHub Packages, in the AmadeusITGroup repository. @@ -27,7 +27,7 @@ Using Maven: ```xml com.amadeus.dataio - dataio-framework + dataio-core x.x.x ``` diff --git a/docs/content/tests.md b/docs/content/tests.md new file mode 100644 index 0000000..44ecf8c --- /dev/null +++ b/docs/content/tests.md @@ -0,0 +1,107 @@ +--- +title: Writing tests +layout: default +nav_order: 5 +--- +# Writing tests +
+ + Table of contents + + {: .text-delta } +1. TOC +{:toc} +
+ +--- + +Data I/O offers a separate library with utility traits and methods designed to facilitate testing Scala/Spark SQL applications. + +## Installation + +Published releases are available on GitHub Packages, in the AmadeusITGroup repository. + +Using Maven: + +```xml + + com.amadeus.dataio + dataio-test + x.x.x + +``` + +## Overview + + +### Interacting with the file system +The `FileSystemSpec` trait provides the Hadoop `LocalFileSystem` for tests needing direct access to an instance of `FileSystem`. + +Example: + +```scala + +import com.amadeus.dataio.test._ +import org.scalatest.flatspec.AnyFlatSpec + +case class MyAppTest extends AnyFlatSpec with FileSystemSpec { + "MyAppTest" should "do something" in { + assert(fs.exists("file:///my_file.txt")) + } +} +``` + + +### Interacting with a SparkSession +The `SparkSpec` trait provides a local Spark session and helper functions for Spark tests: +- `getTestName: String`: Returns the test suite's name. +- `collectData(path: String, format: String, schema: Option[String] = None): Array[String])`: Collects data from the file system. + +Note that extending this trait, you will have to override the getTestName: String function. + +Example: + +```scala + +import com.amadeus.dataio.test._ +import org.scalatest.flatspec.AnyFlatSpec + +case class MyAppTest extends AnyFlatSpec with SparkSpec { + override def getTestName = "MyAppTest" + + "MyAppTest" should "do something" in { + spark.read.format("csv").load("my_data.csv") + collectData + } +} +``` + + +### Interacting with a Streaming context +The `SparkStreamingSpec` trait provides a local Spark session and helper functions for Spark Streaming tests: +- `enableSparkStreamingSchemaInference(): Unit`: Enables Spark streaming schema inference. +- `collectDataStream(dataFrame: DataFrame): Array[String]`: Collects data from a DataFrame read from a stream using an in-memory sink. + + +### Implicitly converting Scala Maps and Lists in Java equivalents +It it sometimes necessary to build complex map structures while building `Typesafe Config` objects, requiring redundant Scala-to-Java conversions. + +To simplify this, you may extend the `JavaImplicitConverters` trait. + +Example: + +```scala + +import com.amadeus.dataio.test._ +import com.typesafe.config.ConfigFactory +import org.scalatest.flatspec.AnyFlatSpec + +case class MyAppTest extends AnyFlatSpec with JavaImplicitConverters { + "MyAppTest" should "do something" in { + ConfigFactory.parseMap( + Map("NodeName" -> Seq(Map("Type" -> "com.Entity"), Map("Type" -> "com.Entity"))) + ) + } +} +``` +