Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DuckDBAppender: Add enum support #210

Merged
merged 10 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ public static class LogicalType
[DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_enum_internal_type")]
public static extern DuckDBType DuckDBEnumInternalType(DuckDBLogicalType type);

[DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_enum_dictionary_size")]
public static extern uint DuckDBEnumDictionarySize(DuckDBLogicalType type);

[DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_enum_dictionary_value")]
public static extern IntPtr DuckDBEnumDictionaryValue(DuckDBLogicalType type, long index);

Expand Down
6 changes: 6 additions & 0 deletions DuckDB.NET.Data/DuckDBAppenderRow.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ public void EndRow()

#endregion

#region Append Enum

public DuckDBAppenderRow AppendValue<TEnum>(TEnum value) where TEnum : Enum => AppendValueInternal(value);

#endregion

#region Append Float

public DuckDBAppenderRow AppendValue(float? value) => AppendValueInternal(value);
Expand Down
93 changes: 93 additions & 0 deletions DuckDB.NET.Data/Internal/Writer/EnumVectorDataWriter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Threading;
using DuckDB.NET.Native;

namespace DuckDB.NET.Data.Internal.Writer;

internal sealed unsafe class EnumVectorDataWriter : VectorDataWriterBase
{
private readonly DuckDBType enumType;

private readonly uint enumDictionarySize;

private readonly Dictionary<string, uint> enumValues;

public EnumVectorDataWriter(IntPtr vector, void* vectorData, DuckDBLogicalType logicalType, DuckDBType columnType) : base(vector, vectorData, columnType)
{
enumType = NativeMethods.LogicalType.DuckDBEnumInternalType(logicalType);
enumDictionarySize = NativeMethods.LogicalType.DuckDBEnumDictionarySize(logicalType);

uint maxEnumDictionarySize = enumType switch
{
DuckDBType.UnsignedTinyInt => byte.MaxValue,
DuckDBType.UnsignedSmallInt => ushort.MaxValue,
DuckDBType.UnsignedInteger => uint.MaxValue,
_ => throw new NotSupportedException($"The internal enum type must be utinyint, usmallint, or uinteger."),
};
if (enumDictionarySize > maxEnumDictionarySize)
{
// This exception should only be thrown if the DuckDB library has a bug.
throw new InvalidOperationException($"The internal enum type is \"{enumType}\" but the enum dictionary size is greater than {maxEnumDictionarySize}.");
Giorgi marked this conversation as resolved.
Show resolved Hide resolved
}

enumValues = [];
for (uint index = 0; index < enumDictionarySize; index++)
{
string enumValueName = NativeMethods.LogicalType.DuckDBEnumDictionaryValue(logicalType, index).ToManagedString();
enumValues.Add(enumValueName, index);
}
}

internal override bool AppendString(string value, int rowIndex)
{
if (enumValues.TryGetValue(value, out uint enumValue))
{
// The following casts to byte and ushort are safe because we ensure in the constructor that the value enumDictionarySize is not too high.
return enumType switch
{
DuckDBType.UnsignedTinyInt => AppendValueInternal((byte)enumValue, rowIndex),
DuckDBType.UnsignedSmallInt => AppendValueInternal((ushort)enumValue, rowIndex),
DuckDBType.UnsignedInteger => AppendValueInternal(enumValue, rowIndex),
_ => throw new InvalidOperationException($"Failed to write Enum column because the internal enum type must be utinyint, usmallint, or uinteger."),
};
}

throw new InvalidOperationException($"Failed to write Enum column because the value \"{value}\" is not valid.");
}

internal override bool AppendEnum<TEnum>(TEnum value, int rowIndex)
{
ulong enumValue = ConvertEnumValueToUInt64(value);
if (enumValue < enumDictionarySize)
{
// The following casts to byte, ushort and uint are safe because we ensure in the constructor that the value enumDictionarySize is not too high.
return enumType switch
{
DuckDBType.UnsignedTinyInt => AppendValueInternal((byte)enumValue, rowIndex),
DuckDBType.UnsignedSmallInt => AppendValueInternal((ushort)enumValue, rowIndex),
DuckDBType.UnsignedInteger => AppendValueInternal((uint)enumValue, rowIndex),
_ => throw new InvalidOperationException($"Failed to write Enum column because the internal enum type must be utinyint, usmallint, or uinteger."),
};
}

throw new InvalidOperationException($"Failed to write Enum column because the value is outside the range (0-{enumDictionarySize-1}).");
}

private static ulong ConvertEnumValueToUInt64<TEnum>(TEnum value) where TEnum : Enum
{
return Convert.GetTypeCode(value) switch
{
TypeCode.SByte => (ulong)Convert.ToSByte(value),
TypeCode.Byte => Convert.ToByte(value),
TypeCode.Int16 => (ulong)Convert.ToInt16(value),
TypeCode.UInt16 => Convert.ToUInt16(value),
TypeCode.Int32 => (ulong)Convert.ToInt32(value),
TypeCode.UInt32 => Convert.ToUInt32(value),
TypeCode.Int64 => (ulong)Convert.ToInt64(value),
TypeCode.UInt64 => Convert.ToUInt64(value),
_ => throw new InvalidOperationException($"Failed to convert the enum value {value} to ulong."),
};
}
}
19 changes: 18 additions & 1 deletion DuckDB.NET.Data/Internal/Writer/ListVectorDataWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ internal override bool AppendCollection(ICollection value, int rowIndex)
IEnumerable<DateTimeOffset> items => WriteItems(items),
IEnumerable<DateTimeOffset?> items => WriteItems(items),

_ => WriteItems<object>((IEnumerable<object>)value)
_ => WriteItemsFallback(value),
};

var duckDBListEntry = new DuckDBListEntry(offset, count);
Expand All @@ -108,6 +108,23 @@ int WriteItems<T>(IEnumerable<T> items)

return 0;
}

int WriteItemsFallback(IEnumerable items)
{
if (IsList == false && count != arraySize)
{
throw new InvalidOperationException($"Column has Array size of {arraySize} but the specified value has size of {count}");
}

var index = 0;

foreach (var item in items)
{
listItemWriter.AppendValue(item, (int)offset + (index++));
}

return 0;
}
}

private void ResizeVector(int rowIndex, ulong count)
Expand Down
4 changes: 4 additions & 0 deletions DuckDB.NET.Data/Internal/Writer/VectorDataWriterBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ public void AppendValue<T>(T value, int rowIndex)
decimal val => AppendDecimal(val, rowIndex),
BigInteger val => AppendBigInteger(val, rowIndex),

Enum val => AppendEnum(val, rowIndex),

string val => AppendString(val, rowIndex),
Guid val => AppendGuid(val, rowIndex),
DateTime val => AppendDateTime(val, rowIndex),
Expand Down Expand Up @@ -96,6 +98,8 @@ public void AppendValue<T>(T value, int rowIndex)

internal virtual bool AppendBigInteger(BigInteger value, int rowIndex) => ThrowException<BigInteger>();

internal virtual bool AppendEnum<TEnum>(TEnum value, int rowIndex) where TEnum : Enum => ThrowException<TEnum>();

internal virtual bool AppendCollection(ICollection value, int rowIndex) => ThrowException<bool>();

private bool ThrowException<T>()
Expand Down
2 changes: 1 addition & 1 deletion DuckDB.NET.Data/Internal/Writer/VectorDataWriterFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public static unsafe VectorDataWriterBase CreateWriter(IntPtr vector, DuckDBLogi
DuckDBType.Blob => new StringVectorDataWriter(vector, dataPointer, columnType),
DuckDBType.Varchar => new StringVectorDataWriter(vector, dataPointer, columnType),
DuckDBType.Bit => throw new NotImplementedException($"Writing {columnType} to data chunk is not yet supported"),
DuckDBType.Enum => throw new NotImplementedException($"Writing {columnType} to data chunk is not yet supported"),
DuckDBType.Enum => new EnumVectorDataWriter(vector, dataPointer, logicalType, columnType),
DuckDBType.Struct => throw new NotImplementedException($"Writing {columnType} to data chunk is not yet supported"),
DuckDBType.Decimal => new DecimalVectorDataWriter(vector, dataPointer, logicalType, columnType),
DuckDBType.TimestampS => new DateTimeVectorDataWriter(vector, dataPointer, columnType),
Expand Down
17 changes: 17 additions & 0 deletions DuckDB.NET.Test/DuckDBManagedAppenderListTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,16 @@ public void ArrayValuesInt()
ListValuesInternal("Integer", faker => faker.Random.Int(), 5);
}

[Fact]
public void ListValuesEnum()
{
Command.CommandText = "CREATE TYPE test_enum AS ENUM('test1','test2','test3');";
Command.ExecuteNonQuery();

ListValuesInternal("test_enum", faker => faker.Random.CollectionItem([null, "test1", "test2", "test3"]));
ListValuesInternal("test_enum", faker => faker.Random.CollectionItem<TestEnum?>([null, TestEnum.Test1, TestEnum.Test2, TestEnum.Test3]));
}

private void ListValuesInternal<T>(string typeName, Func<Faker, T> generator, int? length = null)
{
var rows = 2000;
Expand Down Expand Up @@ -268,4 +278,11 @@ private void ListValuesInternal<T>(string typeName, Func<Faker, T> generator, in
.Should().Throw<InvalidOperationException>().Where(exception => exception.Message.Contains(length.ToString()));
}
}

private enum TestEnum
{
Test1 = 0,
Test2 = 1,
Test3 = 2,
}
}
120 changes: 120 additions & 0 deletions DuckDB.NET.Test/DuckDBManagedAppenderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using System.Numerics;
using Bogus;
using Xunit;
using System.Text;

namespace DuckDB.NET.Test;

Expand Down Expand Up @@ -275,6 +276,50 @@ public void TemporalValues()
result.Select(tuple => tuple.Item8).Should().BeEquivalentTo(dates.Select(TimeOnly.FromDateTime));
}

[Fact]
public void EnumValues()
{
Command.CommandText = GetCreateEnumTypeSql("test_enum1", "test", 3);
Command.ExecuteNonQuery();

Command.CommandText = GetCreateEnumTypeSql("test_enum2", "test", 1000);
Command.ExecuteNonQuery();

Command.CommandText = GetCreateEnumTypeSql("test_enum3", "test", 100000);
Command.ExecuteNonQuery();

Command.CommandText = "CREATE TABLE managedAppenderEnum(a test_enum1, b test_enum1, c test_enum1, d test_enum1, e test_enum1, f test_enum2, g test_enum2, h test_enum3, i test_enum3);";
Command.ExecuteNonQuery();

using (var appender = Connection.CreateAppender("managedAppenderEnum"))
{
appender
.CreateRow()
.AppendNullValue()
.AppendNullValue()
.AppendValue("test1")
.AppendValue(TestEnum1.Test2)
.AppendValue(TestEnum1.Test3)
.AppendValue("test327")
.AppendValue(TestEnum2.Test1000)
.AppendValue("test100000")
.AppendValue(TestEnum3.Test6699)
.EndRow();
}

var queryResult = Connection.Query<(string, TestEnum1?, TestEnum1, string, TestEnum1, TestEnum2, string, string, TestEnum3)>("SELECT a, b, c, d, e, f, g, h, i FROM managedAppenderEnum").ToList();
var result = queryResult[0];
result.Item1.Should().BeNull();
result.Item2.Should().BeNull();
result.Item3.Should().Be(TestEnum1.Test1);
result.Item4.Should().Be("test2");
result.Item5.Should().Be(TestEnum1.Test3);
result.Item6.Should().Be(TestEnum2.Test327);
result.Item7.Should().Be("test1000");
result.Item8.Should().Be("test100000");
result.Item9.Should().Be(TestEnum3.Test6699);
}

[Fact]
public void IncompleteRowThrowsException()
{
Expand Down Expand Up @@ -366,6 +411,35 @@ public void ClosedAdapterThrowException()
}).Should().Throw<InvalidOperationException>();
}

[Fact]
public void EnumNotValidValueThrowException()
{
Command.CommandText = GetCreateEnumTypeSql("enum_not_valid_value_test_enum", "test", 100);
Command.ExecuteNonQuery();

var table = "CREATE TABLE managedAppenderEnumNotValidValueTest(a enum_not_valid_value_test_enum);";
Command.CommandText = table;
Command.ExecuteNonQuery();

Connection.Invoking(dbConnection =>
{
using var appender = dbConnection.CreateAppender("managedAppenderEnumNotValidValueTest");
appender
.CreateRow()
.AppendValue("test12345")
.EndRow();
}).Should().Throw<InvalidOperationException>();

Connection.Invoking(dbConnection =>
{
using var appender = dbConnection.CreateAppender("managedAppenderEnumNotValidValueTest");
appender
.CreateRow()
.AppendValue(EnumNotValidValueTestEnum.NotValid)
.EndRow();
}).Should().Throw<InvalidOperationException>();
}

[Fact]
public void TableWithSchema()
{
Expand Down Expand Up @@ -504,9 +578,55 @@ public void ManagedAppenderOnTableAndColumnsWithSpecialCharacters(string schemaN
}
}

private static string GetCreateEnumTypeSql(string enumName, string enumValueNamePrefix, int count)
{
var stringBuilder = new StringBuilder();
stringBuilder.AppendFormat(CultureInfo.InvariantCulture, "CREATE TYPE {0} AS ENUM(", enumName);

for (int i = 1; i <= count; i++)
{
if (i > 1)
{
stringBuilder.Append(',');
}

stringBuilder.Append('\'');
stringBuilder.Append(enumValueNamePrefix);
stringBuilder.Append(i);
stringBuilder.Append('\'');
}

stringBuilder.Append(");");
return stringBuilder.ToString();
}

private static string GetQualifiedObjectName(params string[] parts) =>
string.Join('.', parts.
Where(p => !string.IsNullOrWhiteSpace(p)).
Select(p => '"' + p + '"')
);

private enum TestEnum1
{
Test1 = 0,
Test2 = 1,
Test3 = 2,
}

private enum TestEnum2 : short
{
Test327 = 326,
Test1000 = 999,
}

private enum TestEnum3 : ulong
{
Test6699 = 6698,
Test100000 = 99999,
}

private enum EnumNotValidValueTestEnum
{
NotValid = 12345,
}
}
Loading