Skip to content

Commit

Permalink
fix uuid
Browse files Browse the repository at this point in the history
  • Loading branch information
Huaxin Gao committed May 19, 2024
1 parent 1f23c18 commit 27716ee
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
11 changes: 11 additions & 0 deletions common/src/main/java/org/apache/comet/parquet/ColumnReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.parquet.column.page.DataPageV2;
import org.apache.parquet.column.page.DictionaryPage;
import org.apache.parquet.column.page.PageReader;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.spark.sql.types.DataType;

import org.apache.comet.CometConf;
Expand Down Expand Up @@ -169,6 +170,7 @@ public void close() {

/** Returns a decoded {@link CometDecodedVector Comet vector}. */
public CometDecodedVector loadVector() {

// Only re-use Comet vector iff:
// 1. if we're not using dictionary encoding, since with dictionary encoding, the native
// side may fallback to plain encoding and the underlying memory address for the vector
Expand Down Expand Up @@ -199,6 +201,11 @@ public CometDecodedVector loadVector() {
currentVector.close();
}

LogicalTypeAnnotation logicalTypeAnnotation =
descriptor.getPrimitiveType().getLogicalTypeAnnotation();
boolean isUuid =
logicalTypeAnnotation instanceof LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;

long[] addresses = Native.currentBatch(nativeHandle);

try (ArrowArray array = ArrowArray.wrap(addresses[0]);
Expand All @@ -207,6 +214,7 @@ public CometDecodedVector loadVector() {
DictionaryEncoding dictionaryEncoding = vector.getField().getDictionary();

CometPlainVector cometVector = new CometPlainVector(vector, useDecimal128);
cometVector.setIsUuid(isUuid);

// Update whether the current vector contains any null values. This is used in the following
// batch(s) to determine whether we can skip loading the native vector.
Expand All @@ -230,11 +238,14 @@ public CometDecodedVector loadVector() {
Dictionary arrowDictionary = dictionaryProvider.lookup(dictionaryEncoding.getId());
CometPlainVector dictionaryVector =
new CometPlainVector(arrowDictionary.getVector(), useDecimal128);
dictionaryVector.setIsUuid(isUuid);
dictionary = new CometDictionary(dictionaryVector);
}

currentVector =
new CometDictionaryVector(cometVector, dictionary, dictionaryProvider, useDecimal128);
cometVector.setIsUuid(isUuid);

return currentVector;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public abstract class CometDecodedVector extends CometVector {
private int numValues;
private int validityByteCacheIndex = -1;
private byte validityByteCache;
protected boolean isUuid;

protected CometDecodedVector(ValueVector vector, Field valueField, boolean useDecimal128) {
super(Utils.fromArrowField(valueField), useDecimal128);
Expand All @@ -47,6 +48,10 @@ protected CometDecodedVector(ValueVector vector, Field valueField, boolean useDe
this.hasNull = numNulls != 0;
}

public void setIsUuid(boolean isUuid) {
this.isUuid = isUuid;
}

@Override
public ValueVector getValueVector() {
return valueVector;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,12 @@ public UTF8String getUTF8String(int rowId) {
byte[] result = new byte[length];
Platform.copyMemory(
null, valueBufferAddress + offset, result, Platform.BYTE_ARRAY_OFFSET, length);
return UTF8String.fromBytes(result);

if (!isUuid) {
return UTF8String.fromBytes(result);
} else {
return UTF8String.fromString(convertToUuid(result).toString());
}
}
}

Expand Down

0 comments on commit 27716ee

Please sign in to comment.