Skip to content

Commit

Permalink
Merge pull request #115 from vitrivr/fix/export-nan
Browse files Browse the repository at this point in the history
Exporting entities with NaN as numeric values
  • Loading branch information
ppanopticon authored May 9, 2022
2 parents 0648bcd + 6eef791 commit a95714a
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ sealed class AbstractCottontailCommand(name: String, help: String, val expand: B
}
}
println("Executing and exporting query took $duration.")
} catch (e: StatusException) {
} catch (e: Throwable) {
print("A ${e::class.java.simpleName} occurred while executing and exporting query: ${e.message}.")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class DumpEntityCommand(client: SimpleClient) : AbstractCottontailCommand.Entity
}
dataExporter.close()
}
println("Dumping ${entityName} took $duration.")
println("Dumping $entityName took $duration.")
} catch (e: Throwable) {
print("A ${e::class.java.simpleName} occurred while executing and exporting query: ${e.message}.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,10 @@ class JsonDataExporter(override val path: Path, val indent: String = "") : DataE
private set

/** The [JsonWriter] instance used to read the JSON file. */
private val writer = JsonWriter(
Files.newBufferedWriter(
this.path,
StandardOpenOption.CREATE_NEW,
StandardOpenOption.WRITE
)
)
private val writer = JsonWriter(Files.newBufferedWriter(this.path, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE))

init {
this.writer.isLenient = true
this.writer.setIndent(this.indent)
this.writer.beginArray() /* Starts writer the JSON array, which is the expected input. */
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.vitrivr.cottontail.data.importer

import com.google.gson.GsonBuilder
import com.google.gson.stream.JsonReader
import com.google.gson.stream.JsonToken
import it.unimi.dsi.fastutil.objects.Object2ObjectArrayMap
Expand All @@ -24,9 +25,14 @@ import java.nio.file.Path
class JsonDataImporter(override val path: Path, override val schema: List<ColumnDef<*>>) : DataImporter {

/** The [JsonReader] instance used to read the JSON file. */
private val reader = JsonReader(Files.newBufferedReader(this.path))
private val reader = GsonBuilder()
.serializeNulls()
.serializeSpecialFloatingPointValues()
.create()
.newJsonReader(Files.newBufferedReader(this.path))

init {
this.reader.isLenient = true
this.reader.beginArray() /* Starts reading the JSON array, which is the expected input. */
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ import org.vitrivr.cottontail.cli.entity.TruncateEntityCommand
import org.vitrivr.cottontail.core.database.Name
import org.vitrivr.cottontail.data.Format
import org.vitrivr.cottontail.test.AbstractClientTest
import org.vitrivr.cottontail.test.GrpcTestUtils
import org.vitrivr.cottontail.test.GrpcTestUtils.countElements
import org.vitrivr.cottontail.test.TestConstants
import org.vitrivr.cottontail.test.TestConstants.TEST_ENTITY_NAME
import java.nio.file.Path
import kotlin.io.path.Path
import kotlin.io.path.deleteIfExists
Expand Down Expand Up @@ -39,13 +41,7 @@ class ExportImportCommandTest : AbstractClientTest() {
fun exportCreatesFile() {
formats.forEach { format ->
TestConstants.ALL_ENTITY_NAMES.forEach { name ->
val path = exportFolder()
path.toFile().mkdirs()
val exported = exportFile(format, name)
exported.deleteIfExists()
DumpEntityCommand.dumpEntity(name, path, format, this.client)
assert(exported.toFile().exists())
assert(exported.toFile().totalSpace > 1)
exportEntity(format, name)
}
}
}
Expand All @@ -67,4 +63,28 @@ class ExportImportCommandTest : AbstractClientTest() {
}
}
}

@Test
fun exportNan() {
GrpcTestUtils.insertIntoTestEntity(client, double = Double.NaN)
GrpcTestUtils.insertIntoTestEntity(client)
formats.forEach { format ->
exportEntity(format, TEST_ENTITY_NAME)
val exportFile = exportFile(format, TEST_ENTITY_NAME)
val count = countElements(this.client, TEST_ENTITY_NAME)
TruncateEntityCommand.truncate(TEST_ENTITY_NAME, this.client, true)
ImportDataCommand.importData(TEST_ENTITY_NAME, exportFile, format, this.client, true)
assert(count == countElements(this.client, TEST_ENTITY_NAME))
}
}

private fun exportEntity(format: Format, fqn: Name.EntityName) {
val path = exportFolder()
path.toFile().mkdirs()
val exported = exportFile(format, fqn)
exported.deleteIfExists()
DumpEntityCommand.dumpEntity(fqn, path, format, this.client)
assert(exported.toFile().exists())
assert(exported.toFile().totalSpace > 1)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ import org.vitrivr.cottontail.client.SimpleClient
import org.vitrivr.cottontail.client.language.basics.Type
import org.vitrivr.cottontail.client.language.ddl.*
import org.vitrivr.cottontail.client.language.dml.BatchInsert
import org.vitrivr.cottontail.client.language.dml.Insert
import org.vitrivr.cottontail.client.language.dql.Query
import org.vitrivr.cottontail.core.database.Name
import org.vitrivr.cottontail.grpc.CottontailGrpc
import org.vitrivr.cottontail.test.TestConstants.DOUBLE_COLUMN_NAME
import org.vitrivr.cottontail.test.TestConstants.INT_COLUMN_NAME
import org.vitrivr.cottontail.test.TestConstants.STRING_COLUMN_NAME
import kotlin.random.Random


Expand Down Expand Up @@ -43,9 +47,9 @@ object GrpcTestUtils {
*/
fun createTestEntity(client: SimpleClient) {
val create = CreateEntity(TestConstants.TEST_ENTITY_NAME.fqn)
.column(TestConstants.STRING_COLUMN_NAME, Type.STRING)
.column(TestConstants.INT_COLUMN_NAME, Type.INTEGER)
.column(TestConstants.DOUBLE_COLUMN_NAME, Type.DOUBLE)
.column(STRING_COLUMN_NAME, Type.STRING)
.column(INT_COLUMN_NAME, Type.INTEGER)
.column(DOUBLE_COLUMN_NAME, Type.DOUBLE)
client.create(create)
}

Expand All @@ -56,9 +60,9 @@ object GrpcTestUtils {
*/
fun createTestVectorEntity(client: SimpleClient) {
val create = CreateEntity(TestConstants.TEST_VECTOR_ENTITY_NAME.fqn)
.column(TestConstants.STRING_COLUMN_NAME, Type.STRING)
.column(TestConstants.INT_COLUMN_NAME, Type.INTEGER)
.column(TestConstants.TWOD_COLUMN_NAME, Type.FLOAT_VECTOR, 2)
.column(STRING_COLUMN_NAME, Type.STRING)
.column(INT_COLUMN_NAME, Type.INTEGER)
.column(TestConstants.TWOD_COLUMN_NAME, Type.FLOAT_VECTOR, 2)
client.create(create)
}

Expand All @@ -68,7 +72,7 @@ object GrpcTestUtils {
* @param client [SimpleClient] to use.
*/
fun populateTestEntity(client: SimpleClient) {
val batch = BatchInsert().into(TestConstants.TEST_ENTITY_NAME.fqn).columns(TestConstants.STRING_COLUMN_NAME, TestConstants.INT_COLUMN_NAME, TestConstants.DOUBLE_COLUMN_NAME)
val batch = BatchInsert().into(TestConstants.TEST_ENTITY_NAME.fqn).columns(STRING_COLUMN_NAME, INT_COLUMN_NAME, DOUBLE_COLUMN_NAME)
val random = Random.Default
repeat(TestConstants.TEST_COLLECTION_SIZE) {
batch.append(
Expand All @@ -80,11 +84,16 @@ object GrpcTestUtils {
client.insert(batch)
}

fun insertIntoTestEntity(client: SimpleClient, string: String = RandomStringUtils.randomAlphabetic(5), int: Int = Random.nextInt(0, 100), double: Double = Random.nextDouble(1.0)) {
val insert = Insert().into(TestConstants.TEST_ENTITY_NAME.fqn).values(Pair(STRING_COLUMN_NAME, string), Pair(INT_COLUMN_NAME, int), Pair(DOUBLE_COLUMN_NAME, double))
client.insert(insert)
}

/**
* Creates a Lucene index on the [TestConstants.TEST_ENTITY_NAME].
*/
fun createLuceneIndexOnTestEntity(client: SimpleClient) {
client.create(CreateIndex(TestConstants.TEST_ENTITY_NAME.fqn, TestConstants.STRING_COLUMN_NAME, CottontailGrpc.IndexType.LUCENE))
client.create(CreateIndex(TestConstants.TEST_ENTITY_NAME.fqn, STRING_COLUMN_NAME, CottontailGrpc.IndexType.LUCENE))
client.optimize(OptimizeEntity(TestConstants.TEST_ENTITY_NAME.fqn))
}

Expand All @@ -95,16 +104,16 @@ object GrpcTestUtils {
*/
fun populateVectorEntity(client: SimpleClient) {
val batch = BatchInsert().into(TestConstants.TEST_VECTOR_ENTITY_NAME.fqn)
.columns(TestConstants.STRING_COLUMN_NAME, TestConstants.INT_COLUMN_NAME, TestConstants.TWOD_COLUMN_NAME)
.columns(STRING_COLUMN_NAME, INT_COLUMN_NAME, TestConstants.TWOD_COLUMN_NAME)
val random = Random.Default
repeat(TestConstants.TEST_COLLECTION_SIZE) {
val lat = random.nextFloat() + random.nextInt(0, 50)
val lon = random.nextFloat() + random.nextInt(0, 50)
val arr = floatArrayOf(lat, lon)
batch.append(
RandomStringUtils.randomAlphabetic(5),
random.nextInt(0, 10),
arr
RandomStringUtils.randomAlphabetic(5),
random.nextInt(0, 10),
arr
)
}
client.insert(batch)
Expand Down

0 comments on commit a95714a

Please sign in to comment.