diff --git a/docs/source/status.rst b/docs/source/status.rst index e52e4e4cd49bc..e860aceb76e15 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -62,7 +62,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Binary | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ -| Large Binary | ✓ | ✓ | ✓ | | | ✓ | ✓ | | +| Large Binary | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Utf8 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ diff --git a/js/src/Arrow.dom.ts b/js/src/Arrow.dom.ts index 9ec76fdd009f3..cdb4171162f63 100644 --- a/js/src/Arrow.dom.ts +++ b/js/src/Arrow.dom.ts @@ -48,7 +48,7 @@ export { Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64, Float, Float16, Float32, Float64, Utf8, LargeUtf8, - Binary, + Binary, LargeBinary, FixedSizeBinary, Date_, DateDay, DateMillisecond, Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, @@ -78,7 +78,7 @@ export { } from './Arrow.js'; export { - BinaryBuilder, + BinaryBuilder, LargeBinaryBuilder, BoolBuilder, DateBuilder, DateDayBuilder, DateMillisecondBuilder, DecimalBuilder, diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts index b7e5f63a6ab5a..6251a9e77717b 100644 --- a/js/src/Arrow.ts +++ b/js/src/Arrow.ts @@ -37,7 +37,7 @@ export { Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64, Float, Float16, Float32, Float64, Utf8, LargeUtf8, - Binary, + Binary, LargeBinary, FixedSizeBinary, Date_, DateDay, DateMillisecond, Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, @@ -80,6 +80,7 @@ export { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, Dur export { Utf8Builder } from './builder/utf8.js'; export { LargeUtf8Builder } from './builder/largeutf8.js'; export { BinaryBuilder } from './builder/binary.js'; +export { LargeBinaryBuilder } from './builder/largebinary.js'; export { ListBuilder } from './builder/list.js'; export { FixedSizeListBuilder } from './builder/fixedsizelist.js'; export { MapBuilder } from './builder/map.js'; diff --git a/js/src/builder.ts b/js/src/builder.ts index 1a4c52f871bbf..a4e2d4d89325c 100644 --- a/js/src/builder.ts +++ b/js/src/builder.ts @@ -22,7 +22,7 @@ import { DataType, strideForType, Float, Int, Decimal, FixedSizeBinary, Date_, Time, Timestamp, Interval, Duration, - Utf8, LargeUtf8, Binary, List, Map_, + Utf8, LargeUtf8, Binary, LargeBinary, List, Map_, } from './type.js'; import { createIsValidFunction } from './builder/valid.js'; import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder, OffsetsBufferBuilder } from './builder/buffer.js'; @@ -285,7 +285,7 @@ export abstract class Builder { if (typeIds = _typeIds?.flush(length)) { // Unions, DenseUnions valueOffsets = _offsets?.flush(length); - } else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, Utf8, LargeUtf8), and Lists + } else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, LargeBinary, Utf8, LargeUtf8), and Lists data = _values?.flush(_offsets.last()); } else { // Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, Duration and Interval) data = _values?.flush(length); @@ -352,7 +352,7 @@ export abstract class FixedWidthBuilder extends Builder { +export abstract class VariableWidthBuilder extends Builder { protected _pendingLength = 0; protected _offsets: OffsetsBufferBuilder; protected _pending: Map | undefined; diff --git a/js/src/builder/largebinary.ts b/js/src/builder/largebinary.ts new file mode 100644 index 0000000000000..59aa7144d20a1 --- /dev/null +++ b/js/src/builder/largebinary.ts @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { LargeBinary } from '../type.js'; +import { toUint8Array } from '../util/buffer.js'; +import { BufferBuilder } from './buffer.js'; +import { VariableWidthBuilder, BuilderOptions } from '../builder.js'; + +/** @ignore */ +export class LargeBinaryBuilder extends VariableWidthBuilder { + constructor(opts: BuilderOptions) { + super(opts); + this._values = new BufferBuilder(new Uint8Array(0)); + } + public get byteLength(): number { + let size = this._pendingLength + (this.length * 4); + this._offsets && (size += this._offsets.byteLength); + this._values && (size += this._values.byteLength); + this._nulls && (size += this._nulls.byteLength); + return size; + } + public setValue(index: number, value: Uint8Array) { + return super.setValue(index, toUint8Array(value)); + } + protected _flushPending(pending: Map, pendingLength: number) { + const offsets = this._offsets; + const data = this._values.reserve(pendingLength).buffer; + let offset = 0; + for (const [index, value] of pending) { + if (value === undefined) { + offsets.set(index, BigInt(0)); + } else { + const length = value.length; + data.set(value, offset); + offsets.set(index, BigInt(length)); + offset += length; + } + } + } +} diff --git a/js/src/builder/largeutf8.ts b/js/src/builder/largeutf8.ts index fddfeaf8e7b17..51890100095c1 100644 --- a/js/src/builder/largeutf8.ts +++ b/js/src/builder/largeutf8.ts @@ -19,6 +19,7 @@ import { LargeUtf8 } from '../type.js'; import { encodeUtf8 } from '../util/utf8.js'; import { BufferBuilder } from './buffer.js'; import { VariableWidthBuilder, BuilderOptions } from '../builder.js'; +import { LargeBinaryBuilder } from './largebinary.js'; /** @ignore */ export class LargeUtf8Builder extends VariableWidthBuilder { @@ -36,24 +37,9 @@ export class LargeUtf8Builder extends VariableWidthBuilder, pendingLength: number): void { } - protected _flushPending(pending: Map, pendingLength: number) { - const offsets = this._offsets; - const data = this._values.reserve(pendingLength).buffer; - let offset = 0; - for (const [index, value] of pending) { - if (value === undefined) { - offsets.set(index, BigInt(0)); - } else { - const length = value.length; - data.set(value, offset); - offsets.set(index, BigInt(length)); - offset += length; - } - } - } + protected _flushPending(pending: Map, pendingLength: number): void { } } -// (LargeUtf8Builder.prototype as any)._flushPending = (LargeBinaryBuilder.prototype as any)._flushPending; +(LargeUtf8Builder.prototype as any)._flushPending = (LargeBinaryBuilder.prototype as any)._flushPending; diff --git a/js/src/data.ts b/js/src/data.ts index 145ee9d049cb4..6f8792508858b 100644 --- a/js/src/data.ts +++ b/js/src/data.ts @@ -17,7 +17,7 @@ import { Vector } from './vector.js'; import { BufferType, Type, UnionMode } from './enum.js'; -import { DataType, LargeUtf8, strideForType } from './type.js'; +import { DataType, strideForType } from './type.js'; import { popcnt_bit_range, truncateBitmap } from './util/bit.js'; // When slicing, we do not know the null count of the sliced range without @@ -253,7 +253,7 @@ export class Data { import { Dictionary, - Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Float, Int, Date_, @@ -324,6 +324,14 @@ class MakeDataVisitor extends Visitor { const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]); } + public visitLargeBinary(props: LargeBinaryDataProps) { + const { ['type']: type, ['offset']: offset = 0 } = props; + const data = toUint8Array(props['data']); + const nullBitmap = toUint8Array(props['nullBitmap']); + const valueOffsets = toBigInt64Array(props['valueOffsets']); + const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; + return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]); + } public visitFixedSizeBinary(props: FixedSizeBinaryDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); @@ -444,6 +452,7 @@ interface IntervalDataProps extends DataProps_ { data?: D interface DurationDataProps extends DataProps_ { data?: DataBuffer } interface FixedSizeBinaryDataProps extends DataProps_ { data?: DataBuffer } interface BinaryDataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer } +interface LargeBinaryDataProps extends DataProps_ { valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?: DataBuffer } interface Utf8DataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer } interface LargeUtf8DataProps extends DataProps_ { valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?: DataBuffer } interface ListDataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; child: Data } @@ -468,6 +477,7 @@ export type DataProps = ( T extends Duration /* */ ? DurationDataProps : T extends FixedSizeBinary /* */ ? FixedSizeBinaryDataProps : T extends Binary /* */ ? BinaryDataProps : + T extends LargeBinary /* */ ? LargeBinaryDataProps : T extends Utf8 /* */ ? Utf8DataProps : T extends LargeUtf8 /* */ ? LargeUtf8DataProps : T extends List /* */ ? ListDataProps : @@ -495,6 +505,7 @@ export function makeData(props: IntervalDataProps): Data< export function makeData(props: DurationDataProps): Data; export function makeData(props: FixedSizeBinaryDataProps): Data; export function makeData(props: BinaryDataProps): Data; +export function makeData(props: LargeBinaryDataProps): Data; export function makeData(props: Utf8DataProps): Data; export function makeData(props: LargeUtf8DataProps): Data; export function makeData(props: ListDataProps): Data; diff --git a/js/src/enum.ts b/js/src/enum.ts index 764ea64e63338..0eecc0c68b525 100644 --- a/js/src/enum.ts +++ b/js/src/enum.ts @@ -173,7 +173,8 @@ export enum Type { FixedSizeBinary = 15, /** Fixed-size binary. Each value occupies the same number of bytes */ FixedSizeList = 16, /** Fixed-size list. Each value occupies the same number of bytes */ Map = 17, /** Map of named logical types */ - Duration = 18, /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. */ + Duration = 18, /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds */ + LargeBinary = 19, /** Large variable-length bytes (no guarantee of UTF8-ness) */ LargeUtf8 = 20, /** Large variable-length string as List */ Dictionary = -1, /** Dictionary aka Category type */ diff --git a/js/src/interfaces.ts b/js/src/interfaces.ts index 707d01bb14cca..c4119a8bd287a 100644 --- a/js/src/interfaces.ts +++ b/js/src/interfaces.ts @@ -35,6 +35,7 @@ import type { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder import type { Utf8Builder } from './builder/utf8.js'; import type { LargeUtf8Builder } from './builder/largeutf8.js'; import type { BinaryBuilder } from './builder/binary.js'; +import type { LargeBinaryBuilder } from './builder/largebinary.js'; import type { ListBuilder } from './builder/list.js'; import type { FixedSizeListBuilder } from './builder/fixedsizelist.js'; import type { MapBuilder } from './builder/map.js'; @@ -210,6 +211,7 @@ export type TypeToDataType = { [Type.Utf8]: type.Utf8; [Type.LargeUtf8]: type.LargeUtf8; [Type.Binary]: type.Binary; + [Type.LargeBinary]: type.LargeBinary; [Type.FixedSizeBinary]: type.FixedSizeBinary; [Type.Date]: type.Date_; [Type.DateDay]: type.DateDay; @@ -264,6 +266,7 @@ type TypeToBuilder = { [Type.Utf8]: Utf8Builder; [Type.LargeUtf8]: LargeUtf8Builder; [Type.Binary]: BinaryBuilder; + [Type.LargeBinary]: LargeBinaryBuilder; [Type.FixedSizeBinary]: FixedSizeBinaryBuilder; [Type.Date]: DateBuilder; [Type.DateDay]: DateDayBuilder; @@ -318,6 +321,7 @@ type DataTypeToBuilder = { [Type.Utf8]: T extends type.Utf8 ? Utf8Builder : never; [Type.LargeUtf8]: T extends type.LargeUtf8 ? LargeUtf8Builder : never; [Type.Binary]: T extends type.Binary ? BinaryBuilder : never; + [Type.LargeBinary]: T extends type.LargeBinary ? LargeBinaryBuilder : never; [Type.FixedSizeBinary]: T extends type.FixedSizeBinary ? FixedSizeBinaryBuilder : never; [Type.Date]: T extends type.Date_ ? DateBuilder : never; [Type.DateDay]: T extends type.DateDay ? DateDayBuilder : never; diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts index b669c0c612f8a..8dc81ced3ffd1 100644 --- a/js/src/ipc/metadata/json.ts +++ b/js/src/ipc/metadata/json.ts @@ -20,7 +20,7 @@ import { Schema, Field } from '../../schema.js'; import { DataType, Dictionary, TimeBitWidth, - Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, + Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Union, Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration, } from '../../type.js'; @@ -149,6 +149,7 @@ function typeFromJSON(f: any, children?: Field[]): DataType { case 'NONE': return new Null(); case 'null': return new Null(); case 'binary': return new Binary(); + case 'largebinary': return new LargeBinary(); case 'utf8': return new Utf8(); case 'largeutf8': return new LargeUtf8(); case 'bool': return new Bool(); diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts index cf05bff54cfba..552c4d846e863 100644 --- a/js/src/ipc/metadata/message.ts +++ b/js/src/ipc/metadata/message.ts @@ -56,7 +56,7 @@ import ByteBuffer = flatbuffers.ByteBuffer; import { DataType, Dictionary, TimeBitWidth, - Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, + Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Union, Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration, } from '../../type.js'; @@ -432,6 +432,7 @@ function decodeFieldType(f: _Field, children?: Field[]): DataType { case Type['NONE']: return new Null(); case Type['Null']: return new Null(); case Type['Binary']: return new Binary(); + case Type['LargeBinary']: return new LargeBinary(); case Type['Utf8']: return new Utf8(); case Type['LargeUtf8']: return new LargeUtf8(); case Type['Bool']: return new Bool(); diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts index 54b4b0249e420..565b0825bd9be 100644 --- a/js/src/ipc/writer.ts +++ b/js/src/ipc/writer.ts @@ -391,7 +391,7 @@ export class RecordBatchJSONWriter extends RecordBatchW protected _writeDictionaryBatch(dictionary: Data, id: number, isDelta = false) { this._dictionaryDeltaOffsets.set(id, dictionary.length + (this._dictionaryDeltaOffsets.get(id) || 0)); this._write(this._dictionaryBlocks.length === 0 ? ` ` : `,\n `); - this._write(`${dictionaryBatchToJSON(dictionary, id, isDelta)}`); + this._write(dictionaryBatchToJSON(dictionary, id, isDelta)); this._dictionaryBlocks.push(new FileBlock(0, 0, 0)); return this; } @@ -401,7 +401,6 @@ export class RecordBatchJSONWriter extends RecordBatchW return this; } public close() { - if (this._dictionaries.length > 0) { this._write(`,\n "dictionaries": [\n`); for (const batch of this._dictionaries) { @@ -413,7 +412,7 @@ export class RecordBatchJSONWriter extends RecordBatchW if (this._recordBatches.length > 0) { for (let i = -1, n = this._recordBatches.length; ++i < n;) { this._write(i === 0 ? `,\n "batches": [\n ` : `,\n `); - this._write(`${recordBatchToJSON(this._recordBatches[i])}`); + this._write(recordBatchToJSON(this._recordBatches[i])); this._recordBatchBlocks.push(new FileBlock(0, 0, 0)); } this._write(`\n ]`); diff --git a/js/src/type.ts b/js/src/type.ts index 6223d0316f17a..dea5301aed355 100644 --- a/js/src/type.ts +++ b/js/src/type.ts @@ -58,6 +58,7 @@ export abstract class DataType { })(Binary.prototype); } +/** @ignore */ +export interface LargeBinary extends DataType { TArray: Uint8Array; TOffsetArray: BigInt64Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor; OffsetArrayType: BigIntArrayConstructor } +/** @ignore */ +export class LargeBinary extends DataType { + constructor() { + super(); + } + public get typeId() { return Type.LargeBinary as Type.LargeBinary; } + public toString() { return `LargeBinary`; } + protected static [Symbol.toStringTag] = ((proto: LargeBinary) => { + (proto).ArrayType = Uint8Array; + (proto).OffsetArrayType = BigInt64Array; + return proto[Symbol.toStringTag] = 'LargeBinary'; + })(LargeBinary.prototype); +} + /** @ignore */ export interface Utf8 extends DataType { TArray: Uint8Array; TOffsetArray: Int32Array; TValue: string; ArrayType: TypedArrayConstructor; OffsetArrayType: TypedArrayConstructor } /** @ignore */ @@ -601,7 +618,6 @@ export class FixedSizeBinary extends DataType { protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => { (proto).byteWidth = null; (proto).ArrayType = Uint8Array; - (proto).OffsetArrayType = Int32Array; return proto[Symbol.toStringTag] = 'FixedSizeBinary'; })(FixedSizeBinary.prototype); } diff --git a/js/src/visitor.ts b/js/src/visitor.ts index 5b3cc4d3d0593..2fb5e7e14bc22 100644 --- a/js/src/visitor.ts +++ b/js/src/visitor.ts @@ -38,6 +38,7 @@ export abstract class Visitor { public visitUtf8(_node: any, ..._args: any[]): any { return null; } public visitLargeUtf8(_node: any, ..._args: any[]): any { return null; } public visitBinary(_node: any, ..._args: any[]): any { return null; } + public visitLargeBinary(_node: any, ..._args: any[]): any { return null; } public visitFixedSizeBinary(_node: any, ..._args: any[]): any { return null; } public visitDate(_node: any, ..._args: any[]): any { return null; } public visitTimestamp(_node: any, ..._args: any[]): any { return null; } @@ -48,7 +49,7 @@ export abstract class Visitor { public visitUnion(_node: any, ..._args: any[]): any { return null; } public visitDictionary(_node: any, ..._args: any[]): any { return null; } public visitInterval(_node: any, ..._args: any[]): any { return null; } - public visitDuration(_node: any, ... _args: any[]): any { return null; } + public visitDuration(_node: any, ..._args: any[]): any { return null; } public visitFixedSizeList(_node: any, ..._args: any[]): any { return null; } public visitMap(_node: any, ..._args: any[]): any { return null; } } @@ -92,6 +93,7 @@ function getVisitFnByTypeId(visitor: Visitor, dtype: Type, throwIfNotFound = tru case Type.Utf8: fn = visitor.visitUtf8; break; case Type.LargeUtf8: fn = visitor.visitLargeUtf8; break; case Type.Binary: fn = visitor.visitBinary; break; + case Type.LargeBinary: fn = visitor.visitLargeBinary; break; case Type.FixedSizeBinary: fn = visitor.visitFixedSizeBinary; break; case Type.Date: fn = visitor.visitDate; break; case Type.DateDay: fn = visitor.visitDateDay || visitor.visitDate; break; @@ -153,6 +155,7 @@ function inferDType(type: T): Type { // @ts-ignore return Type.Float; case Type.Binary: return Type.Binary; + case Type.LargeBinary: return Type.LargeBinary; case Type.Utf8: return Type.Utf8; case Type.LargeUtf8: return Type.LargeUtf8; case Type.Bool: return Type.Bool; @@ -234,6 +237,7 @@ export interface Visitor { visitUtf8(node: any, ...args: any[]): any; visitLargeUtf8(node: any, ...args: any[]): any; visitBinary(node: any, ...args: any[]): any; + visitLargeBinary(node: any, ...args: any[]): any; visitFixedSizeBinary(node: any, ...args: any[]): any; visitDate(node: any, ...args: any[]): any; visitDateDay?(node: any, ...args: any[]): any; diff --git a/js/src/visitor/builderctor.ts b/js/src/visitor/builderctor.ts index 83374712b2642..5b3758c4e0cbc 100644 --- a/js/src/visitor/builderctor.ts +++ b/js/src/visitor/builderctor.ts @@ -22,6 +22,7 @@ import { DataType } from '../type.js'; import { Visitor } from '../visitor.js'; import { BuilderCtor } from '../interfaces.js'; import { BinaryBuilder } from '../builder/binary.js'; +import { LargeBinaryBuilder } from '../builder/largebinary.js'; import { BoolBuilder } from '../builder/bool.js'; import { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from '../builder/date.js'; import { DecimalBuilder } from '../builder/decimal.js'; @@ -70,6 +71,7 @@ export class GetBuilderCtor extends Visitor { public visitUtf8() { return Utf8Builder; } public visitLargeUtf8() { return LargeUtf8Builder; } public visitBinary() { return BinaryBuilder; } + public visitLargeBinary() { return LargeBinaryBuilder; } public visitFixedSizeBinary() { return FixedSizeBinaryBuilder; } public visitDate() { return DateBuilder; } public visitDateDay() { return DateDayBuilder; } diff --git a/js/src/visitor/bytelength.ts b/js/src/visitor/bytelength.ts index c3bfadd50e155..43399b2571fe2 100644 --- a/js/src/visitor/bytelength.ts +++ b/js/src/visitor/bytelength.ts @@ -26,9 +26,10 @@ import { Type, TimeUnit, UnionMode } from '../enum.js'; import { DataType, Dictionary, Float, Int, Date_, Interval, Time, Timestamp, Duration, - Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Union, DenseUnion, SparseUnion, } from '../type.js'; +import { bigIntToNumber } from '../util/bigint.js'; /** @ignore */ const sum = (x: number, y: number) => x + y; @@ -39,6 +40,7 @@ export interface GetByteLengthVisitor extends Visitor { getVisitFn(node: Data | T): (data: Data, index: number) => number; getVisitFn(node: T): (data: Data>, index: number) => number; visitBinary(data: Data, index: number): number; + visitLargeBinary(data: Data, index: number): number; visitUtf8(data: Data, index: number): number; visitLargeUtf8(data: Data, index: number): number; visitList(data: Data, index: number): number; @@ -95,22 +97,15 @@ export class GetByteLengthVisitor extends Visitor { } /** @ignore */ -const getUtf8ByteLength = ({ valueOffsets }: Data, index: number): number => { +const getBinaryByteLength = ({ valueOffsets }: Data, index: number): number => { // 4 + 4 for the indices, `end - start` for the data bytes - return 8 + (valueOffsets[index + 1] - valueOffsets[index]); -}; - -/** @ignore */ -const getBinaryByteLength = ({ valueOffsets }: Data, index: number): number => { - // 4 + 4 for the indices, `end - start` for the data bytes - return 8 + (valueOffsets[index + 1] - valueOffsets[index]); + return 8 + bigIntToNumber(valueOffsets[index + 1]) - bigIntToNumber(valueOffsets[index]); }; /** @ignore */ const getListByteLength = ({ valueOffsets, stride, children }: Data, index: number): number => { const child: Data = children[0]; - const { [index * stride]: start } = valueOffsets; - const { [index * stride + 1]: end } = valueOffsets; + const { [index * stride]: start, [index * stride + 1]: end } = valueOffsets; const visit = instance.getVisitFn(child.type); const slice = child.slice(start, end - start); let size = 8; // 4 + 4 for the indices @@ -155,8 +150,10 @@ const getSparseUnionByteLength = ({ children }: Data, return 4 + instance.visitMany(children, children.map(() => index)).reduce(sum, 0); }; -GetByteLengthVisitor.prototype.visitUtf8 = getUtf8ByteLength; +GetByteLengthVisitor.prototype.visitUtf8 = getBinaryByteLength; +GetByteLengthVisitor.prototype.visitLargeUtf8 = getBinaryByteLength; GetByteLengthVisitor.prototype.visitBinary = getBinaryByteLength; +GetByteLengthVisitor.prototype.visitLargeBinary = getBinaryByteLength; GetByteLengthVisitor.prototype.visitList = getListByteLength; GetByteLengthVisitor.prototype.visitFixedSizeList = getFixedSizeListByteLength; GetByteLengthVisitor.prototype.visitUnion = getUnionByteLength; diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts index 112d2f2983e53..3ab3bcb68c386 100644 --- a/js/src/visitor/get.ts +++ b/js/src/visitor/get.ts @@ -28,7 +28,7 @@ import { uint16ToFloat64 } from '../util/math.js'; import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum.js'; import { DataType, Dictionary, - Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Float, Float16, Float32, Float64, Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Date_, DateDay, DateMillisecond, @@ -36,7 +36,7 @@ import { Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond, Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, Duration, DurationSecond, DurationMillisecond, DurationMicrosecond, DurationNanosecond, - Union, DenseUnion, SparseUnion, LargeUtf8, + Union, DenseUnion, SparseUnion, } from '../type.js'; /** @ignore */ @@ -63,6 +63,7 @@ export interface GetVisitor extends Visitor { visitUtf8(data: Data, index: number): T['TValue'] | null; visitLargeUtf8(data: Data, index: number): T['TValue'] | null; visitBinary(data: Data, index: number): T['TValue'] | null; + visitLargeBinary(data: Data, index: number): T['TValue'] | null; visitFixedSizeBinary(data: Data, index: number): T['TValue'] | null; visitDate(data: Data, index: number): T['TValue'] | null; visitDateDay(data: Data, index: number): T['TValue'] | null; @@ -151,7 +152,7 @@ const getBigInts = ({ values }: Data, index: number): T[ const getFixedSizeBinary = ({ stride, values }: Data, index: number): T['TValue'] => values.subarray(stride * index, stride * (index + 1)); /** @ignore */ -const getBinary = ({ values, valueOffsets }: Data, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index); +const getBinary = ({ values, valueOffsets }: Data, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index); /** @ignore */ const getUtf8 = ({ values, valueOffsets }: Data, index: number): T['TValue'] => { const bytes = getVariableWidthBytes(values, valueOffsets, index); @@ -332,6 +333,7 @@ GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric); GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8); GetVisitor.prototype.visitLargeUtf8 = wrapGet(getUtf8); GetVisitor.prototype.visitBinary = wrapGet(getBinary); +GetVisitor.prototype.visitLargeBinary = wrapGet(getBinary); GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary); GetVisitor.prototype.visitDate = wrapGet(getDate); GetVisitor.prototype.visitDateDay = wrapGet(getDateDay); diff --git a/js/src/visitor/indexof.ts b/js/src/visitor/indexof.ts index 76f95788c7953..1e1cb87a9840e 100644 --- a/js/src/visitor/indexof.ts +++ b/js/src/visitor/indexof.ts @@ -24,7 +24,7 @@ import { getBool, BitIterator } from '../util/bit.js'; import { createElementComparator } from '../util/vector.js'; import { DataType, Dictionary, - Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Float, Float16, Float32, Float64, Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Date_, DateDay, DateMillisecond, @@ -59,6 +59,7 @@ export interface IndexOfVisitor extends Visitor { visitUtf8(data: Data, value: T['TValue'] | null, index?: number): number; visitLargeUtf8(data: Data, value: T['TValue'] | null, index?: number): number; visitBinary(data: Data, value: T['TValue'] | null, index?: number): number; + visitLargeBinary(data: Data, value: T['TValue'] | null, index?: number): number; visitFixedSizeBinary(data: Data, value: T['TValue'] | null, index?: number): number; visitDate(data: Data, value: T['TValue'] | null, index?: number): number; visitDateDay(data: Data, value: T['TValue'] | null, index?: number): number; @@ -175,6 +176,7 @@ IndexOfVisitor.prototype.visitFloat64 = indexOfValue; IndexOfVisitor.prototype.visitUtf8 = indexOfValue; IndexOfVisitor.prototype.visitLargeUtf8 = indexOfValue; IndexOfVisitor.prototype.visitBinary = indexOfValue; +IndexOfVisitor.prototype.visitLargeBinary = indexOfValue; IndexOfVisitor.prototype.visitFixedSizeBinary = indexOfValue; IndexOfVisitor.prototype.visitDate = indexOfValue; IndexOfVisitor.prototype.visitDateDay = indexOfValue; diff --git a/js/src/visitor/iterator.ts b/js/src/visitor/iterator.ts index 09dfcb0b565ae..bf7e9d1591b40 100644 --- a/js/src/visitor/iterator.ts +++ b/js/src/visitor/iterator.ts @@ -21,7 +21,7 @@ import { Type, Precision } from '../enum.js'; import { TypeToDataType } from '../interfaces.js'; import { DataType, Dictionary, - Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Float, Float16, Float32, Float64, Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Date_, DateDay, DateMillisecond, @@ -57,6 +57,7 @@ export interface IteratorVisitor extends Visitor { visitUtf8(vector: Vector): IterableIterator; visitLargeUtf8(vector: Vector): IterableIterator; visitBinary(vector: Vector): IterableIterator; + visitLargeBinary(vector: Vector): IterableIterator; visitFixedSizeBinary(vector: Vector): IterableIterator; visitDate(vector: Vector): IterableIterator; visitDateDay(vector: Vector): IterableIterator; @@ -161,6 +162,7 @@ IteratorVisitor.prototype.visitFloat64 = vectorIterator; IteratorVisitor.prototype.visitUtf8 = vectorIterator; IteratorVisitor.prototype.visitLargeUtf8 = vectorIterator; IteratorVisitor.prototype.visitBinary = vectorIterator; +IteratorVisitor.prototype.visitLargeBinary = vectorIterator; IteratorVisitor.prototype.visitFixedSizeBinary = vectorIterator; IteratorVisitor.prototype.visitDate = vectorIterator; IteratorVisitor.prototype.visitDateDay = vectorIterator; diff --git a/js/src/visitor/jsontypeassembler.ts b/js/src/visitor/jsontypeassembler.ts index a6746a858ecb4..823b1dea104c8 100644 --- a/js/src/visitor/jsontypeassembler.ts +++ b/js/src/visitor/jsontypeassembler.ts @@ -42,6 +42,9 @@ export class JSONTypeAssembler extends Visitor { public visitBinary({ typeId }: T) { return { 'name': ArrowType[typeId].toLowerCase() }; } + public visitLargeBinary({ typeId }: T) { + return { 'name': ArrowType[typeId].toLowerCase() }; + } public visitBool({ typeId }: T) { return { 'name': ArrowType[typeId].toLowerCase() }; } diff --git a/js/src/visitor/jsonvectorassembler.ts b/js/src/visitor/jsonvectorassembler.ts index 9a3cb8601a434..88699d8f168c2 100644 --- a/js/src/visitor/jsonvectorassembler.ts +++ b/js/src/visitor/jsonvectorassembler.ts @@ -27,7 +27,7 @@ import { BitIterator, getBit, getBool } from '../util/bit.js'; import { DataType, Float, Int, Date_, Interval, Time, Timestamp, Union, Duration, - Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, IntArray, LargeUtf8, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, IntArray, } from '../type.js'; /** @ignore */ @@ -44,6 +44,7 @@ export interface JSONVectorAssembler extends Visitor { visitUtf8(data: Data): { DATA: string[]; OFFSET: number[] }; visitLargeUtf8(data: Data): { DATA: string[]; OFFSET: string[] }; visitBinary(data: Data): { DATA: string[]; OFFSET: number[] }; + visitLargeBinary(data: Data): { DATA: string[]; OFFSET: string[] }; visitFixedSizeBinary(data: Data): { DATA: string[] }; visitDate(data: Data): { DATA: number[] }; visitTimestamp(data: Data): { DATA: string[] }; @@ -105,7 +106,10 @@ export class JSONVectorAssembler extends Visitor { return { 'DATA': [...new Vector([data])], 'OFFSET': [...bigNumsToStrings(data.valueOffsets, 2)] }; } public visitBinary(data: Data) { - return { 'DATA': [...binaryToString(new Vector([data]))], OFFSET: [...data.valueOffsets] }; + return { 'DATA': [...binaryToString(new Vector([data]))], 'OFFSET': [...data.valueOffsets] }; + } + public visitLargeBinary(data: Data) { + return { 'DATA': [...binaryToString(new Vector([data]))], 'OFFSET': [...bigNumsToStrings(data.valueOffsets, 2)] }; } public visitFixedSizeBinary(data: Data) { return { 'DATA': [...binaryToString(new Vector([data]))] }; @@ -168,7 +172,7 @@ export class JSONVectorAssembler extends Visitor { } /** @ignore */ -function* binaryToString(vector: Vector | Vector) { +function* binaryToString(vector: Vector | Vector | Vector) { for (const octets of vector as Iterable) { yield octets.reduce((str, byte) => { return `${str}${('0' + (byte & 0xFF).toString(16)).slice(-2)}`; diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts index 15b0721660f55..eb1f280964c8e 100644 --- a/js/src/visitor/set.ts +++ b/js/src/visitor/set.ts @@ -26,7 +26,7 @@ import { float64ToUint16 } from '../util/math.js'; import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum.js'; import { DataType, Dictionary, - Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Float, Float16, Float32, Float64, Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Date_, DateDay, DateMillisecond, @@ -61,6 +61,7 @@ export interface SetVisitor extends Visitor { visitUtf8(data: Data, index: number, value: T['TValue']): void; visitLargeUtf8(data: Data, index: number, value: T['TValue']): void; visitBinary(data: Data, index: number, value: T['TValue']): void; + visitLargeBinary(data: Data, index: number, value: T['TValue']): void; visitFixedSizeBinary(data: Data, index: number, value: T['TValue']): void; visitDate(data: Data, index: number, value: T['TValue']): void; visitDateDay(data: Data, index: number, value: T['TValue']): void; @@ -165,11 +166,9 @@ export const setDateMillisecond = ({ values }: Data({ stride, values }: Data, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); }; /** @ignore */ -const setBinary = ({ values, valueOffsets }: Data, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, value); +const setBinary = ({ values, valueOffsets }: Data, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, value); /** @ignore */ -const setUtf8 = ({ values, valueOffsets }: Data, index: number, value: T['TValue']) => { - setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value)); -}; +const setUtf8 = ({ values, valueOffsets }: Data, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value)); /* istanbul ignore next */ export const setDate = (data: Data, index: number, value: T['TValue']): void => { @@ -370,6 +369,7 @@ SetVisitor.prototype.visitFloat64 = wrapSet(setFloat); SetVisitor.prototype.visitUtf8 = wrapSet(setUtf8); SetVisitor.prototype.visitLargeUtf8 = wrapSet(setUtf8); SetVisitor.prototype.visitBinary = wrapSet(setBinary); +SetVisitor.prototype.visitLargeBinary = wrapSet(setBinary); SetVisitor.prototype.visitFixedSizeBinary = wrapSet(setFixedSizeBinary); SetVisitor.prototype.visitDate = wrapSet(setDate); SetVisitor.prototype.visitDateDay = wrapSet(setDateDay); diff --git a/js/src/visitor/typeassembler.ts b/js/src/visitor/typeassembler.ts index f072714222739..169f3627a4002 100644 --- a/js/src/visitor/typeassembler.ts +++ b/js/src/visitor/typeassembler.ts @@ -25,6 +25,7 @@ import { Null } from '../fb/null.js'; import { Int } from '../fb/int.js'; import { FloatingPoint } from '../fb/floating-point.js'; import { Binary } from '../fb/binary.js'; +import { LargeBinary } from '../fb/large-binary.js'; import { Bool } from '../fb/bool.js'; import { Utf8 } from '../fb/utf8.js'; import { LargeUtf8 } from '../fb/large-utf8.js'; @@ -71,6 +72,10 @@ export class TypeAssembler extends Visitor { Binary.startBinary(b); return Binary.endBinary(b); } + public visitLargeBinary(_node: T, b: Builder) { + LargeBinary.startLargeBinary(b); + return LargeBinary.endLargeBinary(b); + } public visitBool(_node: T, b: Builder) { Bool.startBool(b); return Bool.endBool(b); diff --git a/js/src/visitor/typecomparator.ts b/js/src/visitor/typecomparator.ts index 2417dec09c6e9..a113f2ea31e8d 100644 --- a/js/src/visitor/typecomparator.ts +++ b/js/src/visitor/typecomparator.ts @@ -21,7 +21,7 @@ import { Visitor } from '../visitor.js'; import { Schema, Field } from '../schema.js'; import { DataType, TypeMap, Dictionary, - Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Float, Float16, Float32, Float64, Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Date_, DateDay, DateMillisecond, @@ -55,6 +55,7 @@ export interface TypeComparator extends Visitor { visitUtf8(type: T, other?: DataType | null): other is T; visitLargeUtf8(type: T, other?: DataType | null): other is T; visitBinary(type: T, other?: DataType | null): other is T; + visitLargeBinary(type: T, other?: DataType | null): other is T; visitFixedSizeBinary(type: T, other?: DataType | null): other is T; visitDate(type: T, other?: DataType | null): other is T; visitDateDay(type: T, other?: DataType | null): other is T; @@ -252,6 +253,7 @@ TypeComparator.prototype.visitFloat64 = compareFloat; TypeComparator.prototype.visitUtf8 = compareAny; TypeComparator.prototype.visitLargeUtf8 = compareAny; TypeComparator.prototype.visitBinary = compareAny; +TypeComparator.prototype.visitLargeBinary = compareAny; TypeComparator.prototype.visitFixedSizeBinary = compareFixedSizeBinary; TypeComparator.prototype.visitDate = compareDate; TypeComparator.prototype.visitDateDay = compareDate; diff --git a/js/src/visitor/typector.ts b/js/src/visitor/typector.ts index 2e0bbc4147abb..a781b5fb14fcc 100644 --- a/js/src/visitor/typector.ts +++ b/js/src/visitor/typector.ts @@ -51,6 +51,7 @@ export class GetDataTypeConstructor extends Visitor { public visitUtf8() { return type.Utf8; } public visitLargeUtf8() { return type.LargeUtf8; } public visitBinary() { return type.Binary; } + public visitLargeBinary() { return type.LargeBinary; } public visitFixedSizeBinary() { return type.FixedSizeBinary; } public visitDate() { return type.Date_; } public visitDateDay() { return type.DateDay; } diff --git a/js/src/visitor/vectorassembler.ts b/js/src/visitor/vectorassembler.ts index df820e6f5e00c..7dc3695582dd7 100644 --- a/js/src/visitor/vectorassembler.ts +++ b/js/src/visitor/vectorassembler.ts @@ -27,7 +27,7 @@ import { BufferRegion, FieldNode } from '../ipc/metadata/message.js'; import { DataType, Dictionary, Float, Int, Date_, Interval, Time, Timestamp, Union, Duration, - Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, LargeUtf8, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, } from '../type.js'; import { bigIntToNumber } from '../util/bigint.js'; @@ -44,6 +44,7 @@ export interface VectorAssembler extends Visitor { visitUtf8(data: Data): this; visitLargeUtf8(data: Data): this; visitBinary(data: Data): this; + visitLargeBinary(data: Data): this; visitFixedSizeBinary(data: Data): this; visitDate(data: Data): this; visitTimestamp(data: Data): this; @@ -203,7 +204,7 @@ function assembleFlatVector(this: VectorAssembler, data: Data) { +function assembleFlatListVector(this: VectorAssembler, data: Data) { const { length, values, valueOffsets } = data; const begin = bigIntToNumber(valueOffsets[0]); const end = bigIntToNumber(valueOffsets[length]); @@ -239,6 +240,7 @@ VectorAssembler.prototype.visitFloat = assembleFlatVector; VectorAssembler.prototype.visitUtf8 = assembleFlatListVector; VectorAssembler.prototype.visitLargeUtf8 = assembleFlatListVector; VectorAssembler.prototype.visitBinary = assembleFlatListVector; +VectorAssembler.prototype.visitLargeBinary = assembleFlatListVector; VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector; VectorAssembler.prototype.visitDate = assembleFlatVector; VectorAssembler.prototype.visitTimestamp = assembleFlatVector; diff --git a/js/src/visitor/vectorloader.ts b/js/src/visitor/vectorloader.ts index 35f28f49baada..c9c016d6b463c 100644 --- a/js/src/visitor/vectorloader.ts +++ b/js/src/visitor/vectorloader.ts @@ -77,6 +77,9 @@ export class VectorLoader extends Visitor { public visitBinary(type: T, { length, nullCount } = this.nextFieldNode()) { return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) }); } + public visitLargeBinary(type: T, { length, nullCount } = this.nextFieldNode()) { + return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) }); + } public visitFixedSizeBinary(type: T, { length, nullCount } = this.nextFieldNode()) { return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) }); } @@ -169,7 +172,7 @@ export class JSONVectorLoader extends VectorLoader { return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[])); } else if (DataType.isDecimal(type)) { return toArrayBufferView(Uint8Array, Int128.convertArray(sources[offset] as string[])); - } else if (DataType.isBinary(type) || DataType.isFixedSizeBinary(type)) { + } else if (DataType.isBinary(type) || DataType.isLargeBinary(type) || DataType.isFixedSizeBinary(type)) { return binaryDataFromJSON(sources[offset] as string[]); } else if (DataType.isBool(type)) { return packBools(sources[offset] as number[]); diff --git a/js/test/data/tables.ts b/js/test/data/tables.ts index 449cfe1fb853a..89cf93eab585b 100644 --- a/js/test/data/tables.ts +++ b/js/test/data/tables.ts @@ -27,7 +27,7 @@ const nestedVectorGeneratorNames = ['struct', 'denseUnion', 'sparseUnion', 'map' const dictionaryKeyGeneratorNames = ['int8', 'int16', 'int32', 'uint8', 'uint16', 'uint32']; const valueVectorGeneratorNames = [ 'null_', 'bool', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64', - 'float16', 'float32', 'float64', 'utf8', 'largeUtf8', 'binary', 'fixedSizeBinary', 'dateDay', 'dateMillisecond', + 'float16', 'float32', 'float64', 'utf8', 'largeUtf8', 'binary', 'largeBinary', 'fixedSizeBinary', 'dateDay', 'dateMillisecond', 'timestampSecond', 'timestampMillisecond', 'timestampMicrosecond', 'timestampNanosecond', 'timeSecond', 'timeMillisecond', 'timeMicrosecond', 'timeNanosecond', 'decimal', 'dictionary', 'intervalDayTime', 'intervalYearMonth', diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts index 9d7b038331fe6..be248ad2c6ed8 100644 --- a/js/test/generate-test-data.ts +++ b/js/test/generate-test-data.ts @@ -25,7 +25,7 @@ import { Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64, Float, Float16, Float32, Float64, Utf8, LargeUtf8, - Binary, + Binary, LargeBinary, FixedSizeBinary, Date_, DateDay, DateMillisecond, Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, @@ -54,6 +54,7 @@ interface TestDataVectorGenerator extends Visitor { visit(type: T, length?: number, nullCount?: number): GeneratedVector; visit(type: T, length?: number, nullCount?: number): GeneratedVector; visit(type: T, length?: number, nullCount?: number): GeneratedVector; + visit(type: T, length?: number, nullCount?: number): GeneratedVector; visit(type: T, length?: number, nullCount?: number): GeneratedVector; visit(type: T, length?: number, nullCount?: number): GeneratedVector; visit(type: T, length?: number, nullCount?: number): GeneratedVector; @@ -78,6 +79,7 @@ interface TestDataVectorGenerator extends Visitor { visitUtf8: typeof generateUtf8; visitLargeUtf8: typeof generateLargeUtf8; visitBinary: typeof generateBinary; + visitLargeBinary: typeof generateLargeBinary; visitFixedSizeBinary: typeof generateFixedSizeBinary; visitDate: typeof generateDate; visitTimestamp: typeof generateTimestamp; @@ -104,6 +106,7 @@ TestDataVectorGenerator.prototype.visitFloat = generateFloat; TestDataVectorGenerator.prototype.visitUtf8 = generateUtf8; TestDataVectorGenerator.prototype.visitLargeUtf8 = generateLargeUtf8; TestDataVectorGenerator.prototype.visitBinary = generateBinary; +TestDataVectorGenerator.prototype.visitLargeBinary = generateLargeBinary; TestDataVectorGenerator.prototype.visitFixedSizeBinary = generateFixedSizeBinary; TestDataVectorGenerator.prototype.visitDate = generateDate; TestDataVectorGenerator.prototype.visitTimestamp = generateTimestamp; @@ -219,6 +222,7 @@ export const float64 = (length = 100, nullCount = Math.trunc(length * 0.2)) => v export const utf8 = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new Utf8(), length, nullCount); export const largeUtf8 = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new LargeUtf8(), length, nullCount); export const binary = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new Binary(), length, nullCount); +export const largeBinary = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new LargeBinary(), length, nullCount); export const fixedSizeBinary = (length = 100, nullCount = Math.trunc(length * 0.2), byteWidth = 8) => vectorGenerator.visit(new FixedSizeBinary(byteWidth), length, nullCount); export const dateDay = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new DateDay(), length, nullCount); export const dateMillisecond = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new DateMillisecond(), length, nullCount); @@ -246,7 +250,7 @@ export const fixedSizeList = (length = 100, nullCount = Math.trunc(length * 0.2) export const map = (length = 100, nullCount = Math.trunc(length * 0.2), child: Field> = defaultMapChild()) => vectorGenerator.visit(new Map_(child), length, nullCount); export const vecs = { - null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, largeUtf8, binary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map, durationSecond, durationMillisecond, durationMicrosecond, durationNanosecond + null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, largeUtf8, binary, largeBinary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map, durationSecond, durationMillisecond, durationMicrosecond, durationNanosecond } as { [k: string]: (...args: any[]) => any }; function generateNull(this: TestDataVectorGenerator, type: T, length = 100): GeneratedVector { @@ -368,6 +372,16 @@ function generateBinary(this: TestDataVectorGenerator, type: T return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, valueOffsets, data })]) }; } +function generateLargeBinary(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector { + const nullBitmap = createBitmap(length, nullCount); + const valueOffsets = createVariableWidthOffsets64(length, nullBitmap, 10, 20, nullCount != 0); + const values = [...valueOffsets.slice(1)] + .map((o, i) => isValid(nullBitmap, i) ? o - valueOffsets[i] : null) + .map((length) => length == null ? null : randomBytes(Number(length))); + const data = createVariableWidthBytes(length, nullBitmap, valueOffsets, (i) => values[i]!); + return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, valueOffsets, data })]) }; +} + function generateFixedSizeBinary(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector { const nullBitmap = createBitmap(length, nullCount); const data = fillRandom(Uint8Array, length * type.byteWidth); diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts index 0137c7aa66635..4d1be9b225b08 100644 --- a/js/test/unit/builders/builder-tests.ts +++ b/js/test/unit/builders/builder-tests.ts @@ -46,6 +46,7 @@ describe('Generated Test Data', () => { describe('Utf8Builder', () => { validateBuilder(generate.utf8); }); describe('LargeUtf8Builder', () => { validateBuilder(generate.largeUtf8); }); describe('BinaryBuilder', () => { validateBuilder(generate.binary); }); + describe('LargeBinaryBuilder', () => { validateBuilder(generate.largeBinary); }); describe('FixedSizeBinaryBuilder', () => { validateBuilder(generate.fixedSizeBinary); }); describe('DateDayBuilder', () => { validateBuilder(generate.dateDay); }); describe('DateMillisecondBuilder', () => { validateBuilder(generate.dateMillisecond); }); diff --git a/js/test/unit/generated-data-tests.ts b/js/test/unit/generated-data-tests.ts index 0a06bcbab8ee0..1e26e74730a2d 100644 --- a/js/test/unit/generated-data-tests.ts +++ b/js/test/unit/generated-data-tests.ts @@ -40,6 +40,7 @@ describe('Generated Test Data', () => { describe('Utf8', () => { validateVector(generate.utf8()); }); describe('LargeUtf8', () => { validateVector(generate.largeUtf8()); }); describe('Binary', () => { validateVector(generate.binary()); }); + describe('LargeBinary', () => { validateVector(generate.largeBinary()); }); describe('FixedSizeBinary', () => { validateVector(generate.fixedSizeBinary()); }); describe('DateDay', () => { validateVector(generate.dateDay()); }); describe('DateMillisecond', () => { validateVector(generate.dateMillisecond()); }); diff --git a/js/test/unit/visitor-tests.ts b/js/test/unit/visitor-tests.ts index f78adc59f8e98..6ecb6cca33ed5 100644 --- a/js/test/unit/visitor-tests.ts +++ b/js/test/unit/visitor-tests.ts @@ -18,7 +18,7 @@ import { Field, Visitor, DataType, Dictionary, - Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, + Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Float, Float16, Float32, Float64, Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Date_, DateDay, DateMillisecond, @@ -38,6 +38,7 @@ class BasicVisitor extends Visitor { public visitUtf8(type: T) { return (this.type = type); } public visitLargeUtf8(type: T) { return (this.type = type); } public visitBinary(type: T) { return (this.type = type); } + public visitLargeBinary(type: T) { return (this.type = type); } public visitFixedSizeBinary(type: T) { return (this.type = type); } public visitDate(type: T) { return (this.type = type); } public visitTimestamp(type: T) { return (this.type = type); } @@ -71,6 +72,7 @@ class FeatureVisitor extends Visitor { public visitUtf8(type: T) { return (this.type = type); } public visitLargeUtf8(type: T) { return (this.type = type); } public visitBinary(type: T) { return (this.type = type); } + public visitLargeBinary(type: T) { return (this.type = type); } public visitFixedSizeBinary(type: T) { return (this.type = type); } public visitDateDay(type: T) { return (this.type = type); } public visitDateMillisecond(type: T) { return (this.type = type); } @@ -108,6 +110,7 @@ describe('Visitor', () => { test(`visits Utf8 types`, () => validateBasicVisitor(new Utf8())); test(`visits LargeUtf8 types`, () => validateBasicVisitor(new LargeUtf8())); test(`visits Binary types`, () => validateBasicVisitor(new Binary())); + test(`visits LargeBinary types`, () => validateBasicVisitor(new LargeBinary())); test(`visits FixedSizeBinary types`, () => validateBasicVisitor(new FixedSizeBinary(128))); test(`visits Date types`, () => validateBasicVisitor(new Date_(0))); test(`visits Timestamp types`, () => validateBasicVisitor(new Timestamp(0, 'UTC'))); @@ -149,6 +152,7 @@ describe('Visitor', () => { test(`visits Utf8 types`, () => validateFeatureVisitor(new Utf8())); test(`visits LargeUtf8 types`, () => validateFeatureVisitor(new LargeUtf8())); test(`visits Binary types`, () => validateFeatureVisitor(new Binary())); + test(`visits LargeBinary types`, () => validateFeatureVisitor(new LargeBinary())); test(`visits FixedSizeBinary types`, () => validateFeatureVisitor(new FixedSizeBinary(128))); test(`visits DateDay types`, () => validateFeatureVisitor(new DateDay())); test(`visits DateMillisecond types`, () => validateFeatureVisitor(new DateMillisecond()));