From 2db9ca5c8476e174f733414b5c805d6ca38958c9 Mon Sep 17 00:00:00 2001 From: Sergei Lebedev Date: Wed, 21 Sep 2016 14:08:35 +0300 Subject: [PATCH] Added NPY/NPZ serialization closes #3 --- CHANGES | 1 + build.gradle | 1 + .../kotlin/org/jetbrains/bio/viktor/Loader.kt | 2 +- .../jetbrains/bio/viktor/NativeSpeedups.kt | 2 +- .../org/jetbrains/bio/viktor/Serialization.kt | 51 +++++++++++++++++++ .../jetbrains/bio/viktor/StridedMatrix2.kt | 9 ++-- .../jetbrains/bio/viktor/StridedMatrix3.kt | 9 ++-- .../org/jetbrains/bio/viktor/StridedVector.kt | 3 ++ .../bio/viktor/SerializationTests.kt | 50 ++++++++++++++++++ .../org/jetbrains/bio/viktor/TestSupport.kt | 20 ++++++++ 10 files changed, 140 insertions(+), 8 deletions(-) create mode 100644 src/main/kotlin/org/jetbrains/bio/viktor/Serialization.kt create mode 100644 src/test/kotlin/org/jetbrains/bio/viktor/SerializationTests.kt create mode 100644 src/test/kotlin/org/jetbrains/bio/viktor/TestSupport.kt diff --git a/CHANGES b/CHANGES index 4e01d8b..6e37c90 100644 --- a/CHANGES +++ b/CHANGES @@ -8,6 +8,7 @@ Version 0.3.4 - Fixed a bug in 'StridedMatrix2.toString', which incorrectly rendered large matrices, and unified the implementation with 'StridedMatrix3'. +- Added NumPy-compatible serialization support. Version 0.3.3 ------------- diff --git a/build.gradle b/build.gradle index efac67e..4cc8894 100644 --- a/build.gradle +++ b/build.gradle @@ -131,6 +131,7 @@ repositories { dependencies { compile 'org.apache.commons:commons-math3:3.6' + compile "org.jetbrains.bio:npy:0.3.+" compile "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version" testCompile "org.jetbrains.kotlin:kotlin-test:$kotlin_version" diff --git a/src/main/kotlin/org/jetbrains/bio/viktor/Loader.kt b/src/main/kotlin/org/jetbrains/bio/viktor/Loader.kt index c1233db..b6dcc2a 100644 --- a/src/main/kotlin/org/jetbrains/bio/viktor/Loader.kt +++ b/src/main/kotlin/org/jetbrains/bio/viktor/Loader.kt @@ -4,7 +4,7 @@ import java.nio.file.Files import java.nio.file.Path import java.nio.file.StandardCopyOption.REPLACE_EXISTING -class ResourceLibrary(private val name: String) { +internal class ResourceLibrary(private val name: String) { @Suppress("unchecked_cast") fun install() { val resource = System.mapLibraryName(name) diff --git a/src/main/kotlin/org/jetbrains/bio/viktor/NativeSpeedups.kt b/src/main/kotlin/org/jetbrains/bio/viktor/NativeSpeedups.kt index d539056..b2c6ad0 100644 --- a/src/main/kotlin/org/jetbrains/bio/viktor/NativeSpeedups.kt +++ b/src/main/kotlin/org/jetbrains/bio/viktor/NativeSpeedups.kt @@ -1,6 +1,6 @@ package org.jetbrains.bio.viktor -object NativeSpeedups { +internal object NativeSpeedups { init { Loader.ensureLoaded() } diff --git a/src/main/kotlin/org/jetbrains/bio/viktor/Serialization.kt b/src/main/kotlin/org/jetbrains/bio/viktor/Serialization.kt new file mode 100644 index 0000000..d8a7576 --- /dev/null +++ b/src/main/kotlin/org/jetbrains/bio/viktor/Serialization.kt @@ -0,0 +1,51 @@ +package org.jetbrains.bio.viktor + +import org.jetbrains.bio.npy.NpyArray +import org.jetbrains.bio.npy.NpyFile +import org.jetbrains.bio.npy.NpzFile +import java.nio.file.Path + +/** Returns a view of the [NpyArray] as a strided vector. */ +fun NpyArray.asStridedVector() = asDoubleArray().asStrided() + +/** Returns a view of the [NpyArray] as a 2-D strided matrix. */ +fun NpyArray.asStridedMatrix2(): StridedMatrix2 { + val (numRows, numColumns) = shape + return asStridedVector().reshape(numRows, numColumns) +} + +/** Returns a view of the [NpyArray] as a 3-D strided matrix. */ +fun NpyArray.asStridedMatrix3(): StridedMatrix3 { + val (depth, numRows, numColumns) = shape + return asStridedVector().reshape(depth, numRows, numColumns) +} + +/** Writes a given vector to [path] in NPY format. */ +fun NpyFile.write(path: Path, v: StridedVector) { + write(path, v.toArray(), v.shape) +} + +/** Writes a given 2-D matrix to [path] in NPY format. */ +fun NpyFile.write(path: Path, m: StridedMatrix2) { + write(path, m.flatten().toArray(), shape = m.shape) +} + +/** Writes a given 3-D matrix to [path] in NPY format. */ +fun NpyFile.write(path: Path, m: StridedMatrix3) { + write(path, m.flatten().toArray(), m.shape) +} + +/** Adds a given vector to an NPZ format under the specified [name]. */ +fun NpzFile.Writer.write(name: String, v: StridedVector) { + write(name, v.toArray(), v.shape) +} + +/** Writes a given 2-D matrix into an NPZ file under the specified [name]. */ +fun NpzFile.Writer.write(name: String, m: StridedMatrix2) { + write(name, m.flatten().toArray(), m.shape) +} + +/** Writes a given 3-D matrix into an NPZ file under the specified [name]. */ +fun NpzFile.Writer.write(name: String, m: StridedMatrix3) { + write(name, m.flatten().toArray(), m.shape) +} \ No newline at end of file diff --git a/src/main/kotlin/org/jetbrains/bio/viktor/StridedMatrix2.kt b/src/main/kotlin/org/jetbrains/bio/viktor/StridedMatrix2.kt index aaed0ff..0d8c87e 100644 --- a/src/main/kotlin/org/jetbrains/bio/viktor/StridedMatrix2.kt +++ b/src/main/kotlin/org/jetbrains/bio/viktor/StridedMatrix2.kt @@ -15,9 +15,12 @@ class StridedMatrix2 internal constructor( val rowStride: Int, val columnStride: Int) : FlatMatrixOps { - constructor(numRows: Int, numColumns: Int) : - // Use row-major order by default. - this(numRows, numColumns, DoubleArray(numRows * numColumns), 0, numColumns, 1) {} + constructor(numRows: Int, numColumns: Int, + data: DoubleArray = DoubleArray(numRows * numColumns)) : + this(numRows, numColumns, data, 0, numColumns, 1) {} + + /** Returns the shape of this matrix. */ + val shape: IntArray get() = intArrayOf(rowsNumber, columnsNumber) /** * Dense matrices are laid out in a single contiguous block diff --git a/src/main/kotlin/org/jetbrains/bio/viktor/StridedMatrix3.kt b/src/main/kotlin/org/jetbrains/bio/viktor/StridedMatrix3.kt index 6bf5a12..ea05e2b 100644 --- a/src/main/kotlin/org/jetbrains/bio/viktor/StridedMatrix3.kt +++ b/src/main/kotlin/org/jetbrains/bio/viktor/StridedMatrix3.kt @@ -13,12 +13,15 @@ class StridedMatrix3 internal constructor( : FlatMatrixOps { - constructor(depth: Int, numRows: Int, numColumns: Int) : - this(depth, numRows, numColumns, - DoubleArray(depth * numRows * numColumns), + constructor(depth: Int, numRows: Int, numColumns: Int, + data: DoubleArray = DoubleArray(depth * numRows * numColumns)) : + this(depth, numRows, numColumns, data, 0, numRows * numColumns, numColumns, 1) { } + /** Returns the shape of this matrix. */ + val shape: IntArray get() = intArrayOf(depth, rowsNumber, columnsNumber) + /** * Dense matrices are laid out in a single contiguous block * of memory. diff --git a/src/main/kotlin/org/jetbrains/bio/viktor/StridedVector.kt b/src/main/kotlin/org/jetbrains/bio/viktor/StridedVector.kt index fef609b..01e113f 100644 --- a/src/main/kotlin/org/jetbrains/bio/viktor/StridedVector.kt +++ b/src/main/kotlin/org/jetbrains/bio/viktor/StridedVector.kt @@ -49,6 +49,9 @@ open class StridedVector internal constructor( val indices: IntRange get() = 0..size - 1 + /** Returns the shape of this vector. */ + val shape: IntArray get() = intArrayOf(size) + operator fun get(pos: Int): Double { try { return unsafeGet(pos) diff --git a/src/test/kotlin/org/jetbrains/bio/viktor/SerializationTests.kt b/src/test/kotlin/org/jetbrains/bio/viktor/SerializationTests.kt new file mode 100644 index 0000000..f798e6b --- /dev/null +++ b/src/test/kotlin/org/jetbrains/bio/viktor/SerializationTests.kt @@ -0,0 +1,50 @@ +package org.jetbrains.bio.viktor + +import org.jetbrains.bio.npy.NpyFile +import org.jetbrains.bio.npy.NpzFile +import org.junit.Test +import kotlin.test.assertEquals + +class TestReadWriteNpy { + @Test fun vector() = withTempFile("v", ".npy") { path -> + val v = StridedVector.of(1.0, 2.0, 3.0, 4.0) + NpyFile.write(path, v) + assertEquals(v, NpyFile.read(path).asStridedVector()) + } + + @Test fun matrix2() = withTempFile("m2", ".npy") { path -> + val m = StridedVector.of(1.0, 2.0, 3.0, 4.0, 5.0, 6.0).reshape(2, 3) + NpyFile.write(path, m) + assertEquals(m, NpyFile.read(path).asStridedMatrix2()) + } + + @Test fun matrix3() = withTempFile("m3", ".npy") { path -> + val m = StridedVector.of(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0) + .reshape(1, 4, 2) + NpyFile.write(path, m) + assertEquals(m, NpyFile.read(path).asStridedMatrix3()) + } +} + +class TestReadWriteNpz { + @Test fun combined() { + val v = StridedVector.of(1.0, 2.0, 3.0, 4.0) + val m2 = StridedVector.of(1.0, 2.0, 3.0, 4.0, 5.0, 6.0).reshape(2, 3) + val m3 = StridedVector.of(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0) + .reshape(1, 4, 2) + + withTempFile("vm2m3", ".npz") { path -> + NpzFile.write(path).use { + it.write("v", v) + it.write("m2", m2) + it.write("m3", m3) + } + + NpzFile.read(path).use { + assertEquals(v, it["v"].asStridedVector()) + assertEquals(m2, it["m2"].asStridedMatrix2()) + assertEquals(m3, it["m3"].asStridedMatrix3()) + } + } + } +} \ No newline at end of file diff --git a/src/test/kotlin/org/jetbrains/bio/viktor/TestSupport.kt b/src/test/kotlin/org/jetbrains/bio/viktor/TestSupport.kt new file mode 100644 index 0000000..0a1a222 --- /dev/null +++ b/src/test/kotlin/org/jetbrains/bio/viktor/TestSupport.kt @@ -0,0 +1,20 @@ +package org.jetbrains.bio.viktor + +import java.io.IOException +import java.nio.file.Files +import java.nio.file.Path + +internal inline fun withTempFile(prefix: String, suffix: String, + block: (Path) -> Unit) { + val path = Files.createTempFile(prefix, suffix) + try { + block(path) + } finally { + try { + Files.delete(path) + } catch (e: IOException) { + // Mmaped buffer not yet garbage collected. Leave it to the VM. + path.toFile().deleteOnExit() + } + } +}