Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into hash_join_build_right
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed May 30, 2024
2 parents a6f05c5 + 8c532be commit bb4a1d6
Show file tree
Hide file tree
Showing 58 changed files with 3,765 additions and 262 deletions.
4 changes: 2 additions & 2 deletions .github/actions/setup-spark-builder/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ inputs:
required: true
default: '3.4'
spark-version:
description: 'The Apache Spark version (e.g., 3.4.2) to build'
description: 'The Apache Spark version (e.g., 3.4.3) to build'
required: true
default: '3.4.2'
default: '3.4.3'
comet-version:
description: 'The Comet version to use for Spark'
required: true
Expand Down
113 changes: 113 additions & 0 deletions .github/workflows/pr_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,44 @@ jobs:
# upload test reports only for java 17
upload-test-reports: ${{ matrix.java_version == '17' }}

linux-test-with-spark4_0:
strategy:
matrix:
os: [ubuntu-latest]
java_version: [17]
test-target: [java]
spark-version: ['4.0']
is_push_event:
- ${{ github.event_name == 'push' }}
fail-fast: false
name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }}
runs-on: ${{ matrix.os }}
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java_version }}
- name: Clone Spark
uses: actions/checkout@v4
with:
repository: "apache/spark"
path: "apache-spark"
- name: Install Spark
shell: bash
working-directory: ./apache-spark
run: build/mvn install -Phive -Phadoop-cloud -DskipTests
- name: Java test steps
uses: ./.github/actions/java-test
with:
# TODO: remove -DskipTests after fixing tests
maven_opts: "-Pspark-${{ matrix.spark-version }} -DskipTests"
# TODO: upload test reports after enabling tests
upload-test-reports: false

linux-test-with-old-spark:
strategy:
matrix:
Expand Down Expand Up @@ -169,6 +207,81 @@ jobs:
with:
maven_opts: -Pspark-${{ matrix.spark-version }},scala-${{ matrix.scala-version }}

macos-test-with-spark4_0:
strategy:
matrix:
os: [macos-13]
java_version: [17]
test-target: [java]
spark-version: ['4.0']
fail-fast: false
if: github.event_name == 'push'
name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-macos-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java_version }}
- name: Clone Spark
uses: actions/checkout@v4
with:
repository: "apache/spark"
path: "apache-spark"
- name: Install Spark
shell: bash
working-directory: ./apache-spark
run: build/mvn install -Phive -Phadoop-cloud -DskipTests
- name: Java test steps
uses: ./.github/actions/java-test
with:
# TODO: remove -DskipTests after fixing tests
maven_opts: "-Pspark-${{ matrix.spark-version }} -DskipTests"
# TODO: upload test reports after enabling tests
upload-test-reports: false

macos-aarch64-test-with-spark4_0:
strategy:
matrix:
java_version: [17]
test-target: [java]
spark-version: ['4.0']
is_push_event:
- ${{ github.event_name == 'push' }}
exclude: # exclude java 11 for pull_request event
- java_version: 11
is_push_event: false
fail-fast: false
name: macos-14(Silicon)/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}/${{ matrix.test-target }}
runs-on: macos-14
steps:
- uses: actions/checkout@v4
- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-macos-builder
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java_version }}
jdk-architecture: aarch64
protoc-architecture: aarch_64
- name: Clone Spark
uses: actions/checkout@v4
with:
repository: "apache/spark"
path: "apache-spark"
- name: Install Spark
shell: bash
working-directory: ./apache-spark
run: build/mvn install -Phive -Phadoop-cloud -DskipTests
- name: Java test steps
uses: ./.github/actions/java-test
with:
# TODO: remove -DskipTests after fixing tests
maven_opts: "-Pspark-${{ matrix.spark-version }} -DskipTests"
# TODO: upload test reports after enabling tests
upload-test-reports: false

macos-aarch64-test-with-old-spark:
strategy:
matrix:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/spark_sql_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
matrix:
os: [ubuntu-latest]
java-version: [11]
spark-version: [{short: '3.4', full: '3.4.2'}]
spark-version: [{short: '3.4', full: '3.4.3'}]
module:
- {name: "catalyst", args1: "catalyst/test", args2: ""}
- {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,11 @@ public CometDecodedVector loadVector() {
Dictionary arrowDictionary = importer.getProvider().lookup(dictionaryEncoding.getId());
CometPlainVector dictionaryVector =
new CometPlainVector(arrowDictionary.getVector(), useDecimal128, isUuid);
dictionary = new CometDictionary(dictionaryVector);
if (dictionary != null) {
dictionary.setDictionaryVector(dictionaryVector);
} else {
dictionary = new CometDictionary(dictionaryVector);
}

currentVector =
new CometDictionaryVector(
Expand Down
111 changes: 30 additions & 81 deletions common/src/main/java/org/apache/comet/vector/CometDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,65 +26,72 @@
public class CometDictionary implements AutoCloseable {
private static final int DECIMAL_BYTE_WIDTH = 16;

private final CometPlainVector values;
private CometPlainVector values;
private final int numValues;

/** Decoded dictionary values. Only one of the following is set. */
private byte[] bytes;

private short[] shorts;
private int[] ints;
private long[] longs;
private float[] floats;
private double[] doubles;
private boolean[] booleans;
/** Decoded dictionary values. We only need to copy values for decimal type. */
private ByteArrayWrapper[] binaries;
private UTF8String[] strings;

public CometDictionary(CometPlainVector values) {
this.values = values;
this.numValues = values.numValues();
initialize();
}

public void setDictionaryVector(CometPlainVector values) {
this.values = values;
if (values.numValues() != numValues) {
throw new IllegalArgumentException("Mismatched dictionary size");
}
}

public ValueVector getValueVector() {
return values.getValueVector();
}

public boolean decodeToBoolean(int index) {
return booleans[index];
return values.getBoolean(index);
}

public byte decodeToByte(int index) {
return bytes[index];
return values.getByte(index);
}

public short decodeToShort(int index) {
return shorts[index];
return values.getShort(index);
}

public int decodeToInt(int index) {
return ints[index];
return values.getInt(index);
}

public long decodeToLong(int index) {
return longs[index];
return values.getLong(index);
}

public float decodeToFloat(int index) {
return floats[index];
return values.getFloat(index);
}

public double decodeToDouble(int index) {
return doubles[index];
return values.getDouble(index);
}

public byte[] decodeToBinary(int index) {
return binaries[index].bytes;
switch (values.getValueVector().getMinorType()) {
case VARBINARY:
case FIXEDSIZEBINARY:
return values.getBinary(index);
case DECIMAL:
return binaries[index].bytes;
default:
throw new IllegalArgumentException(
"Invalid Arrow minor type: " + values.getValueVector().getMinorType());
}
}

public UTF8String decodeToUTF8String(int index) {
return strings[index];
return values.getUTF8String(index);
}

@Override
Expand All @@ -94,65 +101,10 @@ public void close() {

private void initialize() {
switch (values.getValueVector().getMinorType()) {
case BIT:
booleans = new boolean[numValues];
for (int i = 0; i < numValues; i++) {
booleans[i] = values.getBoolean(i);
}
break;
case TINYINT:
bytes = new byte[numValues];
for (int i = 0; i < numValues; i++) {
bytes[i] = values.getByte(i);
}
break;
case SMALLINT:
shorts = new short[numValues];
for (int i = 0; i < numValues; i++) {
shorts[i] = values.getShort(i);
}
break;
case INT:
case DATEDAY:
ints = new int[numValues];
for (int i = 0; i < numValues; i++) {
ints[i] = values.getInt(i);
}
break;
case BIGINT:
case TIMESTAMPMICRO:
case TIMESTAMPMICROTZ:
longs = new long[numValues];
for (int i = 0; i < numValues; i++) {
longs[i] = values.getLong(i);
}
break;
case FLOAT4:
floats = new float[numValues];
for (int i = 0; i < numValues; i++) {
floats[i] = values.getFloat(i);
}
break;
case FLOAT8:
doubles = new double[numValues];
for (int i = 0; i < numValues; i++) {
doubles[i] = values.getDouble(i);
}
break;
case VARBINARY:
case FIXEDSIZEBINARY:
binaries = new ByteArrayWrapper[numValues];
for (int i = 0; i < numValues; i++) {
binaries[i] = new ByteArrayWrapper(values.getBinary(i));
}
break;
case VARCHAR:
strings = new UTF8String[numValues];
for (int i = 0; i < numValues; i++) {
strings[i] = values.getUTF8String(i);
}
break;
case DECIMAL:
// We only need to copy values for decimal type as random access
// to the dictionary is not efficient for decimal (it needs to copy
// the value to a new byte array everytime).
binaries = new ByteArrayWrapper[numValues];
for (int i = 0; i < numValues; i++) {
// Need copying here since we re-use byte array for decimal
Expand All @@ -161,9 +113,6 @@ private void initialize() {
binaries[i] = new ByteArrayWrapper(bytes);
}
break;
default:
throw new IllegalArgumentException(
"Invalid Arrow minor type: " + values.getValueVector().getMinorType());
}
}

Expand Down
Loading

0 comments on commit bb4a1d6

Please sign in to comment.