From 8d0f490fcb9d4bfb3c2f3d33065a7d9654aa8ba6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1a=20Zejnilovi=C4=87?= Date: Thu, 18 Aug 2022 18:37:10 +0200 Subject: [PATCH] #28 Bump spark commons version * Bump spark commons version * Add provided dependencies docs to the readme --- README.md | 18 ++++++-- build.sbt | 17 ++------ project/Dependencies.scala | 43 +++++++++++++++++++ .../types/TypedStructField.scala | 7 ++- .../StandardizationInterpreterSuite.scala | 2 +- ...tandardizationInterpreter_ArraySuite.scala | 2 +- 6 files changed, 67 insertions(+), 22 deletions(-) create mode 100644 project/Dependencies.scala diff --git a/README.md b/README.md index d0e9beb..1ce2ed5 100644 --- a/README.md +++ b/README.md @@ -6,15 +6,27 @@ - Dataframe in - Standardized Dataframe out -## Dependency -SBT: +## Usage + +### Needed Provided Dependencies + +The library needs following dependencies to be included in your project + +```sbt +"org.apache.spark" %% "spark-core" % SPARK_VERSION, +"org.apache.spark" %% "spark-sql" % SPARK_VERSION, +"za.co.absa" %% s"spark-commons-spark${SPARK_MAJOR}.${SPARK_MINOR}" % "0.3.1", +``` + +### Usage in SBT: ```sbt "za.co.absa" %% "spark-data-standardization" % VERSION ``` +### Usage in Maven + ### Scala 2.11 [![Maven Central](https://maven-badges.herokuapp.com/maven-central/za.co.absa/spark-data-standardization_2.11/badge.svg)](https://maven-badges.herokuapp.com/maven-central/za.co.absa/spark-data-standardization_2.11) -Maven ```xml za.co.absa diff --git a/build.sbt b/build.sbt index df6d4cc..d918973 100644 --- a/build.sbt +++ b/build.sbt @@ -13,7 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + import sys.process._ +import Dependencies._ ThisBuild / name := "spark-data-standardization" ThisBuild / organization := "za.co.absa" @@ -26,24 +28,13 @@ ThisBuild / scalaVersion := scala211 ThisBuild / versionScheme := Some("early-semver") -def sparkVersion(scalaVersion: String): String = if (scalaVersion==scala212) "3.2.1" else "2.4.7" - -def sparkFastTestsVersion(scalaVersion: String): String = if (scalaVersion == scala212) "1.1.0" else "0.23.0" - -libraryDependencies ++= List( - "org.apache.spark" %% "spark-core" % sparkVersion(scalaVersion.value) % "provided", - "org.apache.spark" %% "spark-sql" % sparkVersion(scalaVersion.value) % "provided", - "za.co.absa" %% "spark-commons" % "0.2.0", - "com.github.mrpowers" %% "spark-fast-tests" % sparkFastTestsVersion(scalaVersion.value) % Test, - "org.scalatest" %% "scalatest" % "3.2.2" % Test, - "com.typesafe" % "config" % "1.4.1" -) +libraryDependencies ++= dependencyList(scalaVersion.value) lazy val printSparkScalaVersion = taskKey[Unit]("Print Spark and Scala versions for standardization") ThisBuild / printSparkScalaVersion := { val log = streams.value.log val scalaVers = scalaVersion.value - log.info(s"Building with Spark ${sparkVersion(scalaVers)}, Scala ${scalaVers}") + log.info(s"Building with Spark ${getSparkVersion(scalaVers)}, Scala ${scalaVers}") } Test / parallelExecution := false diff --git a/project/Dependencies.scala b/project/Dependencies.scala new file mode 100644 index 0000000..0902312 --- /dev/null +++ b/project/Dependencies.scala @@ -0,0 +1,43 @@ +/* + * Copyright 2022 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import sbt._ + +object Dependencies { + private def getSparkVersionUpToMinor(sparkVersion: String): String = { + val pattern = "([0-9]+)\\.([0-9]+)\\.([0-9]+)".r + val pattern(major, minor, patch) = sparkVersion + s"$major.$minor" + } + + private def sparkFastTestsVersion(scalaVersion: String): String = if (scalaVersion.startsWith("2.11")) "0.23.0" else "1.1.0" + + def getSparkVersion(scalaVersion: String): String = if (scalaVersion.startsWith("2.11")) "2.4.7" else "3.2.1" + + def dependencyList(scalaVersion: String): Seq[ModuleID] = { + val sparkVersion = getSparkVersion(scalaVersion) + val sparkVersionUpToMinor = getSparkVersionUpToMinor(sparkVersion) + List( + "org.apache.spark" %% "spark-core" % sparkVersion % Provided, + "org.apache.spark" %% "spark-sql" % sparkVersion % Provided, + "za.co.absa" %% s"spark-commons-spark$sparkVersionUpToMinor" % "0.3.1" % Provided, + "za.co.absa" %% "spark-commons-test" % "0.3.1" % Test, + "com.typesafe" % "config" % "1.4.1", + "com.github.mrpowers" %% "spark-fast-tests" % sparkFastTestsVersion(scalaVersion) % Test, + "org.scalatest" %% "scalatest" % "3.2.2" % Test + ) + } +} diff --git a/src/main/scala/za/co/absa/standardization/types/TypedStructField.scala b/src/main/scala/za/co/absa/standardization/types/TypedStructField.scala index d46324f..ce8b18b 100644 --- a/src/main/scala/za/co/absa/standardization/types/TypedStructField.scala +++ b/src/main/scala/za/co/absa/standardization/types/TypedStructField.scala @@ -20,7 +20,7 @@ import java.sql.{Date, Timestamp} import java.util.Base64 import org.apache.spark.sql.types._ -import za.co.absa.spark.commons.implicits.StructFieldImplicits.{StructFieldEnhancements, StructFieldMetadataEnhancements} +import za.co.absa.spark.commons.implicits.StructFieldImplicits.StructFieldMetadataEnhancements import za.co.absa.standardization.ValidationIssue import za.co.absa.standardization.numeric.{DecimalSymbols, NumericPattern, Radix} import za.co.absa.standardization.schema.{MetadataKeys, MetadataValues} @@ -28,11 +28,10 @@ import za.co.absa.standardization.time.DateTimePattern import za.co.absa.standardization.typeClasses.{DoubleLike, LongLike} import za.co.absa.standardization.types.parsers._ import za.co.absa.standardization.validation.field._ - import scala.util.{Failure, Success, Try} -sealed abstract class TypedStructField(structField: StructField)(implicit defaults: TypeDefaults) - extends StructFieldEnhancements(structField) with Serializable { +sealed abstract class TypedStructField(val structField: StructField)(implicit defaults: TypeDefaults) + extends Serializable { type BaseType diff --git a/src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreterSuite.scala b/src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreterSuite.scala index f1bb46f..7dd611f 100644 --- a/src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreterSuite.scala +++ b/src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreterSuite.scala @@ -343,7 +343,7 @@ class StandardizationInterpreterSuite extends AnyFunSuite with SparkTestBase wit val srcString:String = FileReader.readFileAsString("src/test/resources/data/patients.json") - val src = JsonUtils.getDataFrameFromJson(spark, Seq(srcString)) + val src = JsonUtils.getDataFrameFromJson(Seq(srcString)) logDataFrameContent(src) diff --git a/src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala b/src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala index 1967353..eceb7bd 100644 --- a/src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala +++ b/src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala @@ -208,7 +208,7 @@ class StandardizationInterpreter_ArraySuite extends AnyFunSuite with SparkTestBa val seq = Seq( s"""{"$fieldName": [["a", "bb", "ccc"],["1", "12"],["Hello", null, "World"]]}""" ) - val src = JsonUtils.getDataFrameFromJson(spark, seq) + val src = JsonUtils.getDataFrameFromJson(seq) val subArrayJson = """{"type": "array", "elementType": "string", "containsNull": false}""" val desiredSchema = generateDesiredSchema(subArrayJson, s""""${MetadataKeys.DefaultValue}": "Nope"""")