diff --git a/framework/arcane-framework/.idea/.gitignore b/framework/arcane-framework/.idea/.gitignore deleted file mode 100644 index 13566b8..0000000 --- a/framework/arcane-framework/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/framework/arcane-framework/.idea/codeStyles/Project.xml b/framework/arcane-framework/.idea/codeStyles/Project.xml deleted file mode 100644 index 919ce1f..0000000 --- a/framework/arcane-framework/.idea/codeStyles/Project.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/codeStyles/codeStyleConfig.xml b/framework/arcane-framework/.idea/codeStyles/codeStyleConfig.xml deleted file mode 100644 index a55e7a1..0000000 --- a/framework/arcane-framework/.idea/codeStyles/codeStyleConfig.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/libraries/sbt__org_scala_lang_scala3_library_3_3_6_1_jar.xml b/framework/arcane-framework/.idea/libraries/sbt__org_scala_lang_scala3_library_3_3_6_1_jar.xml deleted file mode 100644 index 46a7979..0000000 --- a/framework/arcane-framework/.idea/libraries/sbt__org_scala_lang_scala3_library_3_3_6_1_jar.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/libraries/sbt__org_scala_lang_scala_library_2_13_15_jar.xml b/framework/arcane-framework/.idea/libraries/sbt__org_scala_lang_scala_library_2_13_15_jar.xml deleted file mode 100644 index ec1f1b8..0000000 --- a/framework/arcane-framework/.idea/libraries/sbt__org_scala_lang_scala_library_2_13_15_jar.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/libraries/sbt__scala_sdk_3_6_1.xml b/framework/arcane-framework/.idea/libraries/sbt__scala_sdk_3_6_1.xml deleted file mode 100644 index 48bdd89..0000000 --- a/framework/arcane-framework/.idea/libraries/sbt__scala_sdk_3_6_1.xml +++ /dev/null @@ -1,66 +0,0 @@ - - - - Scala_3_6 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - file://$USER_HOME$/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-sbt-bridge/3.6.1/scala3-sbt-bridge-3.6.1.jar - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/misc.xml b/framework/arcane-framework/.idea/misc.xml deleted file mode 100644 index f03c948..0000000 --- a/framework/arcane-framework/.idea/misc.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/modules.xml b/framework/arcane-framework/.idea/modules.xml deleted file mode 100644 index 6a5dca2..0000000 --- a/framework/arcane-framework/.idea/modules.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/modules/arcane-framework.arcane-framework-build.iml b/framework/arcane-framework/.idea/modules/arcane-framework.arcane-framework-build.iml deleted file mode 100644 index 80f1d3a..0000000 --- a/framework/arcane-framework/.idea/modules/arcane-framework.arcane-framework-build.iml +++ /dev/null @@ -1,198 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/modules/arcane-framework.iml b/framework/arcane-framework/.idea/modules/arcane-framework.iml deleted file mode 100644 index 4fb6da2..0000000 --- a/framework/arcane-framework/.idea/modules/arcane-framework.iml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/scala_compiler.xml b/framework/arcane-framework/.idea/scala_compiler.xml deleted file mode 100644 index fdff03a..0000000 --- a/framework/arcane-framework/.idea/scala_compiler.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/scala_settings.xml b/framework/arcane-framework/.idea/scala_settings.xml deleted file mode 100644 index 1b970c7..0000000 --- a/framework/arcane-framework/.idea/scala_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/.idea/vcs.xml b/framework/arcane-framework/.idea/vcs.xml deleted file mode 100644 index b2bdec2..0000000 --- a/framework/arcane-framework/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/framework/arcane-framework/build.sbt b/framework/arcane-framework/build.sbt index 36b4158..924c852 100644 --- a/framework/arcane-framework/build.sbt +++ b/framework/arcane-framework/build.sbt @@ -5,5 +5,20 @@ ThisBuild / scalaVersion := "3.6.1" lazy val root = (project in file(".")) .settings( name := "arcane-framework", - idePackagePrefix := Some("com.sneaksanddata.arcane.framework") + idePackagePrefix := Some("com.sneaksanddata.arcane.framework"), + + // Compiler options + Test / logBuffered := false, + + // Framework dependencies + libraryDependencies += "io.delta" % "delta-kernel-api" % "4.0.0rc1", + libraryDependencies += "dev.zio" %% "zio" % "2.1.6", + libraryDependencies += "dev.zio" %% "zio-streams" % "2.1.6", + libraryDependencies += "com.microsoft.sqlserver" % "mssql-jdbc" % "12.8.1.jre11", + libraryDependencies += "software.amazon.awssdk" % "s3" % "2.25.27", + + // Test dependencies + libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.19" % Test, + libraryDependencies += "org.scalatest" %% "scalatest-flatspec" % "3.2.19" % Test + ) diff --git a/framework/arcane-framework/src/main/scala/services/storage/base/BlobStorageReader.scala b/framework/arcane-framework/src/main/scala/services/storage/base/BlobStorageReader.scala new file mode 100644 index 0000000..02bbdff --- /dev/null +++ b/framework/arcane-framework/src/main/scala/services/storage/base/BlobStorageReader.scala @@ -0,0 +1,22 @@ +package com.sneaksanddata.arcane.framework +package services.storage.base + +import services.storage.models.base.BlobPath + +import scala.concurrent.Future + +/** + * A trait that defines the interface for reading from a blob storage. + * + * @tparam PathType The type of the path to the blob. + */ +trait BlobStorageReader[PathType <: BlobPath]: + /** + * Gets the content of the blob at the given path. + * + * @param blobPath The path to the blob. + * @param deserializer function to deserialize the content of the blob. + * @tparam Result The type of the result. + * @return The result of applying the function to the content of the blob. + */ + def getBlobContent[Result](blobPath: PathType, deserializer: Array[Byte] => Result): Future[Result] diff --git a/framework/arcane-framework/src/main/scala/services/storage/base/BlobStorageWriter.scala b/framework/arcane-framework/src/main/scala/services/storage/base/BlobStorageWriter.scala new file mode 100644 index 0000000..80d15a2 --- /dev/null +++ b/framework/arcane-framework/src/main/scala/services/storage/base/BlobStorageWriter.scala @@ -0,0 +1,48 @@ +package com.sneaksanddata.arcane.framework +package services.storage.base + +import services.storage.models.base.BlobPath + +import java.net.URL +import scala.concurrent.Future + +/** + * A trait that defines the interface for writing to a blob storage. + * + * @tparam Path The type of the path to the blob. + */ +trait BlobStorageWriter[Path <: BlobPath, Result]: + /** + * Saves the given bytes as a blob. + * + * @param blobPath The path to the blob. + * @param data The bytes to save. + * @return The result of the upload. + */ + def saveBytesAsBlob(blobPath: Path, data: Array[Byte]): Future[Result] + + /** + * Saves the given text as a blob. + * + * @param blobPath The path to the blob. + * @param data The text to save. + * @return The result of the upload. + */ + def saveTextAsBlob(blobPath: Path, data: String): Future[Result] + + /** + * Removes the blob at the given path. + * + * @param blobPath The path to the blob. + * @param data The data to remove. + */ + def removeBlob(blobPath: Path, data: String): Future[Result] + + /** + * Gets the URI of the blob at the given path. + * + * @param blobPath The path to the blob. + * @param data The data to get. + * @return The URI of the blob. + */ + def getBlobUri(blobPath: Path, data: String): Future[URL] diff --git a/framework/arcane-framework/src/main/scala/services/storage/models/amazon/AmazonS3StoragePath.scala b/framework/arcane-framework/src/main/scala/services/storage/models/amazon/AmazonS3StoragePath.scala new file mode 100644 index 0000000..a195d90 --- /dev/null +++ b/framework/arcane-framework/src/main/scala/services/storage/models/amazon/AmazonS3StoragePath.scala @@ -0,0 +1,53 @@ +package com.sneaksanddata.arcane.framework +package services.storage.models.amazon + +import services.storage.models.base.BlobPath + +import scala.annotation.targetName +import scala.util.matching.Regex +import scala.util.{Failure, Success, Try} + +/** + * Represents a path to a blob in Amazon S3 storage. + * + * @param bucket The name of the bucket. + * @param objectKey The key of the object in the bucket. + */ +final case class AmazonS3StoragePath(bucket: String, objectKey: String) extends BlobPath: + + /** + * Converts the path to a HDFS-style path. + * + * @return The path as a string. + */ + override def toHdfsPath = s"s3a://$bucket/$objectKey" + + /** + * Joins the given key name to the current path. + * + * @param keyName The key name to join. + * @return The new path. + */ + @targetName("plus") + def +(keyName: String) = new AmazonS3StoragePath(bucket, if (objectKey.isEmpty) keyName else s"$objectKey/$keyName") + +/** + * Companion object for [[AmazonS3StoragePath]]. + */ +object AmazonS3StoragePath { + private val matchRegex: String = "s3a://([^/]+)/?(.*)" + + /** + * Creates an [[AmazonS3StoragePath]] from the given HDFS path. + * + * @param hdfsPath The HDFS path. + * @return The [[AmazonS3StoragePath]]. + */ + def apply(hdfsPath: String): Try[AmazonS3StoragePath] = + val r: Regex = AmazonS3StoragePath.matchRegex.r + val m = r.findFirstMatchIn(hdfsPath) + m match { + case Some(matched) => Success(new AmazonS3StoragePath(matched.group(1), matched.group(2).stripSuffix("/"))) + case None => Failure(IllegalArgumentException(s"An AmazonS3StoragePath must be in the format s3a://bucket/path, but was: $hdfsPath")) + } +} \ No newline at end of file diff --git a/framework/arcane-framework/src/main/scala/services/storage/models/base/BlobPath.scala b/framework/arcane-framework/src/main/scala/services/storage/models/base/BlobPath.scala new file mode 100644 index 0000000..090b340 --- /dev/null +++ b/framework/arcane-framework/src/main/scala/services/storage/models/base/BlobPath.scala @@ -0,0 +1,14 @@ +package com.sneaksanddata.arcane.framework +package services.storage.models.base + +/** + * A trait that represents a path to a blob. + */ +trait BlobPath { + /** + * Converts the path to a HDFS-style path. + * + * @return The path as a string. + */ + def toHdfsPath: String +} diff --git a/framework/arcane-framework/src/test/scala/services/storage/models/amazon/AmazonS3StoragePathTests.scala b/framework/arcane-framework/src/test/scala/services/storage/models/amazon/AmazonS3StoragePathTests.scala new file mode 100644 index 0000000..bdd146d --- /dev/null +++ b/framework/arcane-framework/src/test/scala/services/storage/models/amazon/AmazonS3StoragePathTests.scala @@ -0,0 +1,65 @@ +package com.sneaksanddata.arcane.framework +package services.storage.models.amazon + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.must.Matchers +import org.scalatest.prop.TableDrivenPropertyChecks.forAll +import org.scalatest.prop.Tables.Table +import org.scalatest.matchers.should.Matchers._ +import scala.util.{Failure, Success, Try} + +//noinspection ScalaUnusedExpression +class AmazonS3StoragePathTests extends AnyFlatSpec with Matchers { + + "AmazonS3StoragePath" should "be able to parse correct path" in { + val path = "s3a://bucket/key" + val parsed = AmazonS3StoragePath(path) + + parsed should be (Success(AmazonS3StoragePath("bucket", "key"))) + } + + it should "have stable serialization and deserialization" in { + val path = "s3a://bucket/key" + val parsed = AmazonS3StoragePath(path) + val serialized = AmazonS3StoragePath(parsed.get.toHdfsPath) + + parsed should be (Success(AmazonS3StoragePath(serialized.get.bucket, serialized.get.objectKey))) + } + + it should "serialize and deserialize s3 paths in the same way" in { + val path = "s3a://bucket/key" + val parsed = AmazonS3StoragePath(path) + val serialized = AmazonS3StoragePath(parsed.get.toHdfsPath) + + parsed.get.toHdfsPath should be (serialized.get.toHdfsPath) + } + + it should "be able to jon paths" in { + val path = "s3a://bucket/key" + val parsed = AmazonS3StoragePath(path) + val parsedJoined = parsed.get + "key2" + + parsedJoined should be (AmazonS3StoragePath("bucket", "key/key2")) + } + + + private val joinPathCases = Table( + // First tuple defines column names + ("original", "joined", "expected"), + + // Subsequent tuples define the data + ("s3a://bucket-name/", "/folder1///folder2/file.txt", "s3a://bucket-name//folder1///folder2/file.txt"), + ("s3a://bucket-name/", "folder1///folder2/file.txt", "s3a://bucket-name/folder1///folder2/file.txt"), + ("s3a://bucket-name", "folder1///folder2/file.txt", "s3a://bucket-name/folder1///folder2/file.txt"), + ("s3a://bucket-name", "/folder1///folder2/file.txt", "s3a://bucket-name//folder1///folder2/file.txt") + ) + + forAll (joinPathCases) { (orig: String, rest: String, expectedResult: String ) => + it should f"be able to remove extra slashes with values ($orig, $rest, $expectedResult)" in { + val parsed = AmazonS3StoragePath(orig) + val result = parsed.get + rest + + result.toHdfsPath should be (expectedResult) + } + } +} \ No newline at end of file