From 64b5f3c6e1a8c6e8ebda6d1ec75819593d3cacd1 Mon Sep 17 00:00:00 2001
From: KAZUYUKI TANIMURA <ktanimura@apple.com>
Date: Thu, 18 Jul 2024 12:38:58 -0700
Subject: [PATCH] feat: Spark-4.0 widening type support (#604)

## Rationale for this change

To be ready for Spark 4.0

## What changes are included in this PR?

This PR adds type widening feature support introduced in Spark-4.0

## How are these changes tested?

Enabled ParquetTypeWideningSuite
---
 .../java/org/apache/comet/parquet/Native.java |   1 +
 .../org/apache/comet/parquet/TypeUtil.java    |  43 ++-
 .../java/org/apache/comet/parquet/Utils.java  |  15 +-
 dev/diffs/4.0.0-preview1.diff                 |  49 +--
 native/core/benches/parquet_read.rs           |   4 +-
 native/core/src/parquet/data_type.rs          |   5 +
 native/core/src/parquet/mod.rs                |   9 +-
 native/core/src/parquet/read/column.rs        | 176 ++++++++++-
 native/core/src/parquet/read/levels.rs        |   4 +-
 native/core/src/parquet/read/values.rs        | 282 +++++++++++++-----
 native/core/src/parquet/util/jni.rs           |  12 +-
 11 files changed, 484 insertions(+), 116 deletions(-)

diff --git a/common/src/main/java/org/apache/comet/parquet/Native.java b/common/src/main/java/org/apache/comet/parquet/Native.java
index b40e27e73..f4820fedf 100644
--- a/common/src/main/java/org/apache/comet/parquet/Native.java
+++ b/common/src/main/java/org/apache/comet/parquet/Native.java
@@ -70,6 +70,7 @@ public static native long initColumnReader(
       int maxDl,
       int maxRl,
       int bitWidth,
+      int expectedBitWidth,
       boolean isSigned,
       int typeLength,
       int precision,
diff --git a/common/src/main/java/org/apache/comet/parquet/TypeUtil.java b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java
index bfbb7d0d2..0ec5042c7 100644
--- a/common/src/main/java/org/apache/comet/parquet/TypeUtil.java
+++ b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java
@@ -29,6 +29,7 @@
 import org.apache.parquet.schema.Types;
 import org.apache.spark.package$;
 import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException;
+import org.apache.spark.sql.internal.SQLConf;
 import org.apache.spark.sql.types.*;
 
 import org.apache.comet.CometConf;
@@ -57,7 +58,9 @@ public static ColumnDescriptor convertToParquet(StructField field) {
     if (type == DataTypes.BooleanType || type == DataTypes.NullType) {
       builder = Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition);
     } else if (type == DataTypes.IntegerType || type instanceof YearMonthIntervalType) {
-      builder = Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition);
+      builder =
+          Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
+              .as(LogicalTypeAnnotation.intType(32, true));
     } else if (type == DataTypes.DateType) {
       builder =
           Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
@@ -148,6 +151,12 @@ && isUnsignedIntTypeMatched(logicalTypeAnnotation, 32)) {
           return;
         } else if (sparkType instanceof YearMonthIntervalType) {
           return;
+        } else if (sparkType == DataTypes.DoubleType && isSpark40Plus()) {
+          return;
+        } else if (sparkType == TimestampNTZType$.MODULE$
+            && isSpark40Plus()
+            && logicalTypeAnnotation instanceof DateLogicalTypeAnnotation) {
+          return;
         }
         break;
       case INT64:
@@ -159,11 +168,13 @@ && isUnsignedIntTypeMatched(logicalTypeAnnotation, 64)) {
           // For unsigned int64, it stores as plain signed int64 in Parquet when dictionary
           // fallbacks. We read them as decimal values.
           return;
-        } else if (isTimestampTypeMatched(logicalTypeAnnotation, TimeUnit.MICROS)) {
+        } else if (isTimestampTypeMatched(logicalTypeAnnotation, TimeUnit.MICROS)
+            && (sparkType == TimestampNTZType$.MODULE$ || sparkType == DataTypes.TimestampType)) {
           validateTimestampType(logicalTypeAnnotation, sparkType);
           // TODO: use dateTimeRebaseMode from Spark side
           return;
-        } else if (isTimestampTypeMatched(logicalTypeAnnotation, TimeUnit.MILLIS)) {
+        } else if (isTimestampTypeMatched(logicalTypeAnnotation, TimeUnit.MILLIS)
+            && (sparkType == TimestampNTZType$.MODULE$ || sparkType == DataTypes.TimestampType)) {
           validateTimestampType(logicalTypeAnnotation, sparkType);
           return;
         }
@@ -266,9 +277,29 @@ private static boolean isDecimalTypeMatched(ColumnDescriptor descriptor, DataTyp
       DecimalLogicalTypeAnnotation decimalType = (DecimalLogicalTypeAnnotation) typeAnnotation;
       // It's OK if the required decimal precision is larger than or equal to the physical decimal
       // precision in the Parquet metadata, as long as the decimal scale is the same.
-      return decimalType.getPrecision() <= d.precision()
-          && (decimalType.getScale() == d.scale()
-              || (isSpark40Plus() && decimalType.getScale() <= d.scale()));
+      return (decimalType.getPrecision() <= d.precision() && decimalType.getScale() == d.scale())
+          || (isSpark40Plus()
+              && (!SQLConf.get().parquetVectorizedReaderEnabled()
+                  || (decimalType.getScale() <= d.scale()
+                      && decimalType.getPrecision() - decimalType.getScale()
+                          <= d.precision() - d.scale())));
+    } else if (isSpark40Plus()) {
+      boolean isNullTypeAnnotation = typeAnnotation == null;
+      boolean isIntTypeAnnotation = typeAnnotation instanceof IntLogicalTypeAnnotation;
+      if (!SQLConf.get().parquetVectorizedReaderEnabled()) {
+        return isNullTypeAnnotation || isIntTypeAnnotation;
+      } else if (isNullTypeAnnotation
+          || (isIntTypeAnnotation && ((IntLogicalTypeAnnotation) typeAnnotation).isSigned())) {
+        PrimitiveType.PrimitiveTypeName typeName =
+            descriptor.getPrimitiveType().getPrimitiveTypeName();
+        int integerPrecision = d.precision() - d.scale();
+        switch (typeName) {
+          case INT32:
+            return integerPrecision >= DecimalType$.MODULE$.IntDecimal().precision();
+          case INT64:
+            return integerPrecision >= DecimalType$.MODULE$.LongDecimal().precision();
+        }
+      }
     }
     return false;
   }
diff --git a/common/src/main/java/org/apache/comet/parquet/Utils.java b/common/src/main/java/org/apache/comet/parquet/Utils.java
index 2d4b83a67..f73251e27 100644
--- a/common/src/main/java/org/apache/comet/parquet/Utils.java
+++ b/common/src/main/java/org/apache/comet/parquet/Utils.java
@@ -115,7 +115,7 @@ public static long initColumnReader(
       promotionInfo = new TypePromotionInfo(readType);
     } else {
       // If type promotion is not enable, we'll just use the Parquet primitive type and precision.
-      promotionInfo = new TypePromotionInfo(primitiveTypeId, precision, scale);
+      promotionInfo = new TypePromotionInfo(primitiveTypeId, precision, scale, bitWidth);
     }
 
     return Native.initColumnReader(
@@ -126,6 +126,7 @@ public static long initColumnReader(
         descriptor.getMaxDefinitionLevel(),
         descriptor.getMaxRepetitionLevel(),
         bitWidth,
+        promotionInfo.bitWidth,
         isSigned,
         primitiveType.getTypeLength(),
         precision,
@@ -147,11 +148,14 @@ static class TypePromotionInfo {
     int precision;
     // Decimal scale from the Spark read schema, or -1 if it's not decimal type.
     int scale;
+    // Integer bit width from the Spark read schema, or -1 if it's not integer type.
+    int bitWidth;
 
-    TypePromotionInfo(int physicalTypeId, int precision, int scale) {
+    TypePromotionInfo(int physicalTypeId, int precision, int scale, int bitWidth) {
       this.physicalTypeId = physicalTypeId;
       this.precision = precision;
       this.scale = scale;
+      this.bitWidth = bitWidth;
     }
 
     TypePromotionInfo(DataType sparkReadType) {
@@ -164,15 +168,22 @@ static class TypePromotionInfo {
       LogicalTypeAnnotation annotation = primitiveType.getLogicalTypeAnnotation();
       int precision = -1;
       int scale = -1;
+      int bitWidth = -1;
       if (annotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
         LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalAnnotation =
             (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) annotation;
         precision = decimalAnnotation.getPrecision();
         scale = decimalAnnotation.getScale();
       }
+      if (annotation instanceof LogicalTypeAnnotation.IntLogicalTypeAnnotation) {
+        LogicalTypeAnnotation.IntLogicalTypeAnnotation intAnnotation =
+            (LogicalTypeAnnotation.IntLogicalTypeAnnotation) annotation;
+        bitWidth = intAnnotation.getBitWidth();
+      }
       this.physicalTypeId = physicalTypeId;
       this.precision = precision;
       this.scale = scale;
+      this.bitWidth = bitWidth;
     }
   }
 
diff --git a/dev/diffs/4.0.0-preview1.diff b/dev/diffs/4.0.0-preview1.diff
index abd6e9a04..dfd57ce8f 100644
--- a/dev/diffs/4.0.0-preview1.diff
+++ b/dev/diffs/4.0.0-preview1.diff
@@ -2012,28 +2012,39 @@ index 25f6af1cc33..37b40cb5524 100644
        val e = testSchemaMismatch(dir.getCanonicalPath, vectorizedReaderEnabled = false)
        val expectedMessage = "Encountered error while reading file"
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
-index 4bd35e0789b..6bfedb65078 100644
+index 4bd35e0789b..6544d86dbe0 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
-@@ -24,7 +24,7 @@ import org.apache.parquet.format.converter.ParquetMetadataConverter
- import org.apache.parquet.hadoop.{ParquetFileReader, ParquetOutputFormat}
- 
- import org.apache.spark.SparkException
--import org.apache.spark.sql.{DataFrame, QueryTest, Row}
-+import org.apache.spark.sql.{DataFrame, IgnoreCometSuite, QueryTest, Row}
- import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
- import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
- import org.apache.spark.sql.functions.col
-@@ -38,7 +38,8 @@ class ParquetTypeWideningSuite
-     extends QueryTest
-     with ParquetTest
-     with SharedSparkSession
--    with AdaptiveSparkPlanHelper {
-+    with AdaptiveSparkPlanHelper
-+    with IgnoreCometSuite { // TODO: https://github.com/apache/datafusion-comet/issues/551
- 
-   import testImplicits._
+@@ -65,7 +65,9 @@ class ParquetTypeWideningSuite
+     withClue(
+       s"with dictionary encoding '$dictionaryEnabled' with timestamp rebase mode " +
+         s"'$timestampRebaseMode''") {
+-      withAllParquetWriters {
++      // TODO: Comet cannot read DELTA_BINARY_PACKED created by V2 writer
++      // https://github.com/apache/datafusion-comet/issues/574
++      // withAllParquetWriters {
+         withTempDir { dir =>
+           val expected =
+             writeParquetFiles(dir, values, fromType, dictionaryEnabled, timestampRebaseMode)
+@@ -86,7 +88,7 @@ class ParquetTypeWideningSuite
+             }
+           }
+         }
+-      }
++      // }
+     }
+   }
  
+@@ -190,7 +192,8 @@ class ParquetTypeWideningSuite
+       (Seq("1", "2", Short.MinValue.toString), ShortType, DoubleType),
+       (Seq("1", "2", Int.MinValue.toString), IntegerType, DoubleType),
+       (Seq("1.23", "10.34"), FloatType, DoubleType),
+-      (Seq("2020-01-01", "2020-01-02", "1312-02-27"), DateType, TimestampNTZType)
++      // TODO: Comet cannot handle older than "1582-10-15"
++      (Seq("2020-01-01", "2020-01-02"/* , "1312-02-27" */), DateType, TimestampNTZType)
+     )
+   }
+   test(s"parquet widening conversion $fromType -> $toType") {
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
 index b8f3ea3c6f3..bbd44221288 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
diff --git a/native/core/benches/parquet_read.rs b/native/core/benches/parquet_read.rs
index 32463c077..1f8178cd2 100644
--- a/native/core/benches/parquet_read.rs
+++ b/native/core/benches/parquet_read.rs
@@ -54,10 +54,10 @@ fn bench(c: &mut Criterion) {
         );
         b.iter(|| {
             let cd = ColumnDescriptor::new(t.clone(), 0, 0, ColumnPath::from(Vec::new()));
-            let promition_info = TypePromotionInfo::new(PhysicalType::INT32, -1, -1);
+            let promotion_info = TypePromotionInfo::new(PhysicalType::INT32, -1, -1, 32);
             let mut column_reader = TestColumnReader::new(
                 cd,
-                promition_info,
+                promotion_info,
                 BATCH_SIZE,
                 pages.clone(),
                 expected_num_values,
diff --git a/native/core/src/parquet/data_type.rs b/native/core/src/parquet/data_type.rs
index 0fc960e9b..cd578e2a2 100644
--- a/native/core/src/parquet/data_type.rs
+++ b/native/core/src/parquet/data_type.rs
@@ -30,11 +30,15 @@ make_type!(BoolType);
 make_type!(Int8Type);
 make_type!(UInt8Type);
 make_type!(Int16Type);
+make_type!(Int16ToDoubleType);
 make_type!(UInt16Type);
 make_type!(Int32Type);
 make_type!(Int32To64Type);
+make_type!(Int32ToDecimal64Type);
+make_type!(Int32ToDoubleType);
 make_type!(UInt32Type);
 make_type!(Int64Type);
+make_type!(Int64ToDecimal64Type);
 make_type!(UInt64Type);
 make_type!(FloatType);
 make_type!(DoubleType);
@@ -48,6 +52,7 @@ make_type!(FLBADecimal32Type);
 make_type!(FLBADecimal64Type);
 make_type!(FLBAType);
 make_type!(Int32DateType);
+make_type!(Int32TimestampMicrosType);
 make_type!(Int64TimestampMillisType);
 make_type!(Int64TimestampMicrosType);
 make_type!(Int96TimestampMicrosType);
diff --git a/native/core/src/parquet/mod.rs b/native/core/src/parquet/mod.rs
index e6acaa26b..c523f843f 100644
--- a/native/core/src/parquet/mod.rs
+++ b/native/core/src/parquet/mod.rs
@@ -67,6 +67,7 @@ pub extern "system" fn Java_org_apache_comet_parquet_Native_initColumnReader(
     max_dl: jint,
     max_rl: jint,
     bit_width: jint,
+    read_bit_width: jint,
     is_signed: jboolean,
     type_length: jint,
     precision: jint,
@@ -95,8 +96,12 @@ pub extern "system" fn Java_org_apache_comet_parquet_Native_initColumnReader(
             is_adjusted_utc,
             jni_path,
         )?;
-        let promotion_info =
-            TypePromotionInfo::new_from_jni(read_primitive_type, read_precision, read_scale);
+        let promotion_info = TypePromotionInfo::new_from_jni(
+            read_primitive_type,
+            read_precision,
+            read_scale,
+            read_bit_width,
+        );
         let ctx = Context {
             column_reader: ColumnReader::get(
                 desc,
diff --git a/native/core/src/parquet/read/column.rs b/native/core/src/parquet/read/column.rs
index 22bade6b3..feb342719 100644
--- a/native/core/src/parquet/read/column.rs
+++ b/native/core/src/parquet/read/column.rs
@@ -51,13 +51,18 @@ pub enum ColumnReader {
     Int8ColumnReader(TypedColumnReader<Int8Type>),
     UInt8ColumnReader(TypedColumnReader<UInt8Type>),
     Int16ColumnReader(TypedColumnReader<Int16Type>),
+    Int16ToDoubleColumnReader(TypedColumnReader<Int16ToDoubleType>),
     UInt16ColumnReader(TypedColumnReader<UInt16Type>),
     Int32ColumnReader(TypedColumnReader<Int32Type>),
     Int32To64ColumnReader(TypedColumnReader<Int32To64Type>),
+    Int32ToDecimal64ColumnReader(TypedColumnReader<Int32ToDecimal64Type>),
+    Int32ToDoubleColumnReader(TypedColumnReader<Int32ToDoubleType>),
     UInt32ColumnReader(TypedColumnReader<UInt32Type>),
     Int32DecimalColumnReader(TypedColumnReader<Int32DecimalType>),
     Int32DateColumnReader(TypedColumnReader<Int32DateType>),
+    Int32TimestampMicrosColumnReader(TypedColumnReader<Int32TimestampMicrosType>),
     Int64ColumnReader(TypedColumnReader<Int64Type>),
+    Int64ToDecimal64ColumnReader(TypedColumnReader<Int64ToDecimal64Type>),
     UInt64DecimalColumnReader(TypedColumnReader<UInt64Type>),
     Int64DecimalColumnReader(TypedColumnReader<Int64DecimalType>),
     Int64TimestampMillisColumnReader(TypedColumnReader<Int64TimestampMillisType>),
@@ -124,19 +129,81 @@ impl ColumnReader {
                             bit_width,
                             is_signed,
                         } => match (bit_width, is_signed) {
-                            (8, true) => typed_reader!(Int8ColumnReader, Int8),
+                            (8, true) => match promotion_info.physical_type {
+                                PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                                    if promotion_info.precision <= DECIMAL_MAX_INT_DIGITS
+                                        && promotion_info.scale < 1
+                                    {
+                                        typed_reader!(Int32ColumnReader, Int32)
+                                    } else if promotion_info.precision <= DECIMAL_MAX_LONG_DIGITS {
+                                        typed_reader!(
+                                            Int32ToDecimal64ColumnReader,
+                                            ArrowDataType::Decimal128(
+                                                promotion_info.precision as u8,
+                                                promotion_info.scale as i8
+                                            )
+                                        )
+                                    } else {
+                                        typed_reader!(
+                                            Int32DecimalColumnReader,
+                                            ArrowDataType::Decimal128(
+                                                promotion_info.precision as u8,
+                                                promotion_info.scale as i8
+                                            )
+                                        )
+                                    }
+                                }
+                                _ => typed_reader!(Int8ColumnReader, Int8),
+                            },
                             (8, false) => typed_reader!(UInt8ColumnReader, Int16),
-                            (16, true) => typed_reader!(Int16ColumnReader, Int16),
+                            (16, true) => match promotion_info.physical_type {
+                                PhysicalType::DOUBLE => {
+                                    typed_reader!(Int16ToDoubleColumnReader, Float64)
+                                }
+                                PhysicalType::INT32 if promotion_info.bit_width == 32 => {
+                                    typed_reader!(Int32ColumnReader, Int32)
+                                }
+                                PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                                    if promotion_info.precision <= DECIMAL_MAX_INT_DIGITS
+                                        && promotion_info.scale < 1
+                                    {
+                                        typed_reader!(Int32ColumnReader, Int32)
+                                    } else if promotion_info.precision <= DECIMAL_MAX_LONG_DIGITS {
+                                        typed_reader!(
+                                            Int32ToDecimal64ColumnReader,
+                                            ArrowDataType::Decimal128(
+                                                promotion_info.precision as u8,
+                                                promotion_info.scale as i8
+                                            )
+                                        )
+                                    } else {
+                                        typed_reader!(
+                                            Int32DecimalColumnReader,
+                                            ArrowDataType::Decimal128(
+                                                promotion_info.precision as u8,
+                                                promotion_info.scale as i8
+                                            )
+                                        )
+                                    }
+                                }
+                                _ => typed_reader!(Int16ColumnReader, Int16),
+                            },
                             (16, false) => typed_reader!(UInt16ColumnReader, Int32),
-                            (32, true) => typed_reader!(Int32ColumnReader, Int32),
+                            (32, true) => match promotion_info.physical_type {
+                                PhysicalType::INT32 if promotion_info.bit_width == 16 => {
+                                    typed_reader!(Int16ColumnReader, Int16)
+                                }
+                                _ => typed_reader!(Int32ColumnReader, Int32),
+                            },
                             (32, false) => typed_reader!(UInt32ColumnReader, Int64),
                             _ => unimplemented!("Unsupported INT32 annotation: {:?}", lt),
                         },
                         LogicalType::Decimal {
-                            scale,
+                            scale: _,
                             precision: _,
                         } => {
-                            if use_decimal_128 || scale < &promotion_info.scale {
+                            if use_decimal_128 || promotion_info.precision > DECIMAL_MAX_LONG_DIGITS
+                            {
                                 typed_reader!(
                                     Int32DecimalColumnReader,
                                     ArrowDataType::Decimal128(
@@ -145,17 +212,56 @@ impl ColumnReader {
                                     )
                                 )
                             } else {
-                                typed_reader!(Int32ColumnReader, Int32)
+                                typed_reader!(
+                                    Int32ToDecimal64ColumnReader,
+                                    ArrowDataType::Decimal128(
+                                        promotion_info.precision as u8,
+                                        promotion_info.scale as i8
+                                    )
+                                )
                             }
                         }
-                        LogicalType::Date => typed_reader!(Int32DateColumnReader, Date32),
+                        LogicalType::Date => match promotion_info.physical_type {
+                            PhysicalType::INT64 => typed_reader!(
+                                Int32TimestampMicrosColumnReader,
+                                ArrowDataType::Timestamp(TimeUnit::Microsecond, None)
+                            ),
+                            _ => typed_reader!(Int32DateColumnReader, Date32),
+                        },
                         lt => unimplemented!("Unsupported logical type for INT32: {:?}", lt),
                     }
                 } else {
                     // We support type promotion from int to long
                     match promotion_info.physical_type {
+                        PhysicalType::INT32 if promotion_info.bit_width == 16 => {
+                            typed_reader!(Int16ColumnReader, Int16)
+                        }
                         PhysicalType::INT32 => typed_reader!(Int32ColumnReader, Int32),
                         PhysicalType::INT64 => typed_reader!(Int32To64ColumnReader, Int64),
+                        PhysicalType::DOUBLE => typed_reader!(Int32ToDoubleColumnReader, Float64),
+                        PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                            if promotion_info.precision <= DECIMAL_MAX_INT_DIGITS
+                                && promotion_info.scale < 1
+                            {
+                                typed_reader!(Int32ColumnReader, Int32)
+                            } else if promotion_info.precision <= DECIMAL_MAX_LONG_DIGITS {
+                                typed_reader!(
+                                    Int32ToDecimal64ColumnReader,
+                                    ArrowDataType::Decimal128(
+                                        promotion_info.precision as u8,
+                                        promotion_info.scale as i8
+                                    )
+                                )
+                            } else {
+                                typed_reader!(
+                                    Int32DecimalColumnReader,
+                                    ArrowDataType::Decimal128(
+                                        promotion_info.precision as u8,
+                                        promotion_info.scale as i8
+                                    )
+                                )
+                            }
+                        }
                         t => unimplemented!("Unsupported read physical type for INT32: {}", t),
                     }
                 }
@@ -175,10 +281,11 @@ impl ColumnReader {
                             _ => panic!("Unsupported INT64 annotation: {:?}", lt),
                         },
                         LogicalType::Decimal {
-                            scale,
+                            scale: _,
                             precision: _,
                         } => {
-                            if use_decimal_128 || scale < &promotion_info.scale {
+                            if use_decimal_128 || promotion_info.precision > DECIMAL_MAX_LONG_DIGITS
+                            {
                                 typed_reader!(
                                     Int64DecimalColumnReader,
                                     ArrowDataType::Decimal128(
@@ -187,7 +294,13 @@ impl ColumnReader {
                                     )
                                 )
                             } else {
-                                typed_reader!(Int64ColumnReader, Int64)
+                                typed_reader!(
+                                    Int64ToDecimal64ColumnReader,
+                                    ArrowDataType::Decimal128(
+                                        promotion_info.precision as u8,
+                                        promotion_info.scale as i8
+                                    )
+                                )
                             }
                         }
                         LogicalType::Timestamp {
@@ -226,8 +339,25 @@ impl ColumnReader {
                         lt => panic!("Unsupported logical type for INT64: {:?}", lt),
                     }
                 } else {
-                    // By default it is INT(64, true)
-                    typed_reader!(Int64ColumnReader, Int64)
+                    match promotion_info.physical_type {
+                        PhysicalType::FIXED_LEN_BYTE_ARRAY => {
+                            if promotion_info.precision <= DECIMAL_MAX_LONG_DIGITS
+                                && promotion_info.scale < 1
+                            {
+                                typed_reader!(Int64ColumnReader, Int64)
+                            } else {
+                                typed_reader!(
+                                    Int64DecimalColumnReader,
+                                    ArrowDataType::Decimal128(
+                                        promotion_info.precision as u8,
+                                        promotion_info.scale as i8
+                                    )
+                                )
+                            }
+                        }
+                        // By default it is INT(64, true)
+                        _ => typed_reader!(Int64ColumnReader, Int64),
+                    }
                 }
             }
             PhysicalType::INT96 => {
@@ -266,8 +396,16 @@ impl ColumnReader {
                         } => {
                             if !use_decimal_128 && precision <= DECIMAL_MAX_INT_DIGITS {
                                 typed_reader!(FLBADecimal32ColumnReader, Int32)
-                            } else if !use_decimal_128 && precision <= DECIMAL_MAX_LONG_DIGITS {
-                                typed_reader!(FLBADecimal64ColumnReader, Int64)
+                            } else if !use_decimal_128
+                                && promotion_info.precision <= DECIMAL_MAX_LONG_DIGITS
+                            {
+                                typed_reader!(
+                                    FLBADecimal64ColumnReader,
+                                    ArrowDataType::Decimal128(
+                                        promotion_info.precision as u8,
+                                        promotion_info.scale as i8
+                                    )
+                                )
                             } else {
                                 typed_reader!(
                                     FLBADecimalColumnReader,
@@ -306,13 +444,18 @@ macro_rules! make_func {
             Self::Int8ColumnReader(ref typed) => typed.$func($($args),*),
             Self::UInt8ColumnReader(ref typed) => typed.$func($($args),*),
             Self::Int16ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int16ToDoubleColumnReader(ref typed) => typed.$func($($args), *),
             Self::UInt16ColumnReader(ref typed) => typed.$func($($args),*),
             Self::Int32ColumnReader(ref typed) => typed.$func($($args),*),
             Self::Int32To64ColumnReader(ref typed) => typed.$func($($args), *),
+            Self::Int32ToDecimal64ColumnReader(ref typed) => typed.$func($($args), *),
+            Self::Int32ToDoubleColumnReader(ref typed) => typed.$func($($args), *),
             Self::UInt32ColumnReader(ref typed) => typed.$func($($args),*),
             Self::Int32DateColumnReader(ref typed) => typed.$func($($args),*),
             Self::Int32DecimalColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int32TimestampMicrosColumnReader(ref typed) => typed.$func($($args),*),
             Self::Int64ColumnReader(ref typed) => typed.$func($($args),*),
+            Self::Int64ToDecimal64ColumnReader(ref typed) => typed.$func($($args), *),
             Self::UInt64DecimalColumnReader(ref typed) => typed.$func($($args),*),
             Self::Int64DecimalColumnReader(ref typed) => typed.$func($($args),*),
             Self::Int64TimestampMillisColumnReader(ref typed) => typed.$func($($args),*),
@@ -339,13 +482,18 @@ macro_rules! make_func_mut {
             Self::Int8ColumnReader(ref mut typed) => typed.$func($($args),*),
             Self::UInt8ColumnReader(ref mut typed) => typed.$func($($args),*),
             Self::Int16ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int16ToDoubleColumnReader(ref mut typed) => typed.$func($($args), *),
             Self::UInt16ColumnReader(ref mut typed) => typed.$func($($args),*),
             Self::Int32ColumnReader(ref mut typed) => typed.$func($($args),*),
             Self::Int32To64ColumnReader(ref mut typed) => typed.$func($($args), *),
+            Self::Int32ToDecimal64ColumnReader(ref mut typed) => typed.$func($($args), *),
+            Self::Int32ToDoubleColumnReader(ref mut typed) => typed.$func($($args), *),
             Self::UInt32ColumnReader(ref mut typed) => typed.$func($($args),*),
             Self::Int32DateColumnReader(ref mut typed) => typed.$func($($args),*),
             Self::Int32DecimalColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int32TimestampMicrosColumnReader(ref mut typed) => typed.$func($($args),*),
             Self::Int64ColumnReader(ref mut typed) => typed.$func($($args),*),
+            Self::Int64ToDecimal64ColumnReader(ref mut typed) => typed.$func($($args), *),
             Self::UInt64DecimalColumnReader(ref mut typed) => typed.$func($($args),*),
             Self::Int64DecimalColumnReader(ref mut typed) => typed.$func($($args),*),
             Self::Int64TimestampMillisColumnReader(ref mut typed) => typed.$func($($args),*),
diff --git a/native/core/src/parquet/read/levels.rs b/native/core/src/parquet/read/levels.rs
index d43c28684..303db54c8 100644
--- a/native/core/src/parquet/read/levels.rs
+++ b/native/core/src/parquet/read/levels.rs
@@ -121,8 +121,8 @@ impl LevelDecoder {
             match self.mode {
                 Mode::RLE => {
                     if self.current_value as i16 == max_def_level {
-                        value_decoder.read_batch(vector, n);
                         bit::set_bits(vector.validity_buffer.as_slice_mut(), vector.num_values, n);
+                        value_decoder.read_batch(vector, n);
                         vector.num_values += n;
                     } else {
                         vector.put_nulls(n);
@@ -132,8 +132,8 @@ impl LevelDecoder {
                     for i in 0..n {
                         if self.current_buffer[self.current_buffer_idx + i] == max_def_level as i32
                         {
-                            value_decoder.read(vector);
                             bit::set_bit(vector.validity_buffer.as_slice_mut(), vector.num_values);
+                            value_decoder.read(vector);
                             vector.num_values += 1;
                         } else {
                             vector.put_null();
diff --git a/native/core/src/parquet/read/values.rs b/native/core/src/parquet/read/values.rs
index ebed5f95b..8eae330a5 100644
--- a/native/core/src/parquet/read/values.rs
+++ b/native/core/src/parquet/read/values.rs
@@ -23,6 +23,8 @@ use log::debug;
 use parquet::{basic::Encoding, schema::types::ColumnDescPtr};
 
 use super::{PlainDecoderInner, PlainDecoding, PlainDictDecoding, ReadOptions};
+use crate::write_null;
+use crate::write_val_or_null;
 use crate::{
     common::bit::{self, BitReader},
     parquet::{data_type::*, read::DECIMAL_BYTE_WIDTH, ParquetMutableVector},
@@ -180,63 +182,38 @@ macro_rules! make_plain_dict_impl {
 }
 
 make_plain_dict_impl! { Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type, UInt32Type }
-make_plain_dict_impl! { Int32DateType, Int64Type, FloatType, FLBAType }
+make_plain_dict_impl! { Int32DateType, Int64Type, FloatType, Int64ToDecimal64Type, FLBAType }
 make_plain_dict_impl! { DoubleType, Int64TimestampMillisType, Int64TimestampMicrosType }
 
-impl PlainDictDecoding for Int32To64Type {
-    fn decode_dict_one(
-        idx: usize,
-        val_idx: usize,
-        src: &ParquetMutableVector,
-        dst: &mut ParquetMutableVector,
-        _: usize,
-    ) {
-        let src_ptr = src.value_buffer.as_ptr() as *const i32;
-        let dst_ptr = dst.value_buffer.as_mut_ptr() as *mut i64;
-        unsafe {
-            dst_ptr
-                .add(idx)
-                .write_unaligned(src_ptr.add(val_idx).read_unaligned() as i64);
-        }
-    }
-}
-
-impl PlainDecoding for FloatToDoubleType {
-    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
-        let src_ptr = src.data.as_ptr() as *const f32;
-        let dst_ptr = dst.value_buffer.as_mut_ptr() as *mut f64;
-        unsafe {
-            for i in 0..num {
-                dst_ptr
-                    .add(dst.num_values + i)
-                    .write_unaligned(src_ptr.add(src.offset + i).read_unaligned() as f64);
+macro_rules! make_int_variant_dict_impl {
+    ($ty:ty, $src_ty:ty, $dst_ty:ty) => {
+        impl PlainDictDecoding for $ty {
+            fn decode_dict_one(
+                idx: usize,
+                val_idx: usize,
+                src: &ParquetMutableVector,
+                dst: &mut ParquetMutableVector,
+                _: usize,
+            ) {
+                let src_ptr = src.value_buffer.as_ptr() as *const $src_ty;
+                let dst_ptr = dst.value_buffer.as_mut_ptr() as *mut $dst_ty;
+                unsafe {
+                    // SAFETY the caller must ensure `idx`th pointer is in bounds
+                    dst_ptr
+                        .add(idx)
+                        .write_unaligned(src_ptr.add(val_idx).read_unaligned() as $dst_ty);
+                }
             }
         }
-        src.offset += 4 * num;
-    }
-
-    fn skip(src: &mut PlainDecoderInner, num: usize) {
-        src.offset += 4 * num;
-    }
+    };
 }
 
-impl PlainDictDecoding for FloatToDoubleType {
-    fn decode_dict_one(
-        idx: usize,
-        val_idx: usize,
-        src: &ParquetMutableVector,
-        dst: &mut ParquetMutableVector,
-        _: usize,
-    ) {
-        let src_ptr = src.value_buffer.as_ptr() as *const f32;
-        let dst_ptr = dst.value_buffer.as_mut_ptr() as *mut f64;
-        unsafe {
-            dst_ptr
-                .add(idx)
-                .write_unaligned(src_ptr.add(val_idx).read_unaligned() as f64);
-        }
-    }
-}
+make_int_variant_dict_impl!(Int16ToDoubleType, i16, f64);
+make_int_variant_dict_impl!(Int32To64Type, i32, i64);
+make_int_variant_dict_impl!(Int32ToDecimal64Type, i32, i64);
+make_int_variant_dict_impl!(Int32ToDoubleType, i32, f64);
+make_int_variant_dict_impl!(Int32TimestampMicrosType, i32, i64);
+make_int_variant_dict_impl!(FloatToDoubleType, f32, f64);
 
 impl PlainDecoding for Int32DateType {
     fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
@@ -259,7 +236,7 @@ impl PlainDecoding for Int32DateType {
                     if unlikely(v.read_unaligned() < JULIAN_GREGORIAN_SWITCH_OFF_DAY) {
                         panic!(
                         "Encountered date value {}, which is before 1582-10-15 (counting backwards \
-                         from Unix eopch date 1970-01-01), and could be ambigous depending on \
+                         from Unix epoch date 1970-01-01), and could be ambigous depending on \
                          whether a legacy Julian/Gregorian hybrid calendar is used, or a Proleptic \
                          Gregorian calendar is used.",
                         *v
@@ -285,6 +262,57 @@ impl PlainDecoding for Int32DateType {
     }
 }
 
+impl PlainDecoding for Int32TimestampMicrosType {
+    #[inline]
+    fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+        let src_data = &src.data;
+        let byte_width = src.desc.type_length() as usize;
+        let num_bytes = byte_width * num;
+
+        {
+            let mut offset = src.offset;
+            for _ in 0..num {
+                let v = src_data[offset..offset + byte_width].as_ptr() as *const i32;
+
+                // TODO: optimize this further as checking value one by one is not very efficient
+                unsafe {
+                    if unlikely(v.read_unaligned() < JULIAN_GREGORIAN_SWITCH_OFF_DAY) {
+                        panic!(
+                            "Encountered timestamp value {}, which is before 1582-10-15 (counting \
+                        backwards from Unix epoch date 1970-01-01), and could be ambigous \
+                        depending on whether a legacy Julian/Gregorian hybrid calendar is used, \
+                        or a Proleptic Gregorian calendar is used.",
+                            *v
+                        );
+                    }
+                }
+
+                offset += byte_width;
+            }
+        }
+
+        let mut offset = src.offset;
+        let dst_byte_width = byte_width * 2;
+        let mut dst_offset = dst_byte_width * dst.num_values;
+        for _ in 0..num {
+            let v = src_data[offset..offset + byte_width].as_ptr() as *const i32;
+            let v = unsafe { v.read_unaligned() };
+            let v = (v as i64).wrapping_mul(MICROS_PER_DAY);
+            bit::memcpy_value(&v, dst_byte_width, &mut dst.value_buffer[dst_offset..]);
+            offset += byte_width;
+            dst_offset += dst_byte_width;
+        }
+
+        src.offset += num_bytes;
+    }
+
+    #[inline]
+    fn skip(src: &mut PlainDecoderInner, num: usize) {
+        let num_bytes = src.desc.type_length() as usize * num;
+        src.offset += num_bytes;
+    }
+}
+
 impl PlainDecoding for Int64TimestampMillisType {
     #[inline]
     fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
@@ -305,7 +333,7 @@ impl PlainDecoding for Int64TimestampMillisType {
                     if unlikely(v < JULIAN_GREGORIAN_SWITCH_OFF_TS) {
                         panic!(
                             "Encountered timestamp value {}, which is before 1582-10-15 (counting \
-                         backwards from Unix eopch date 1970-01-01), and could be ambigous \
+                         backwards from Unix epoch date 1970-01-01), and could be ambigous \
                          depending on whether a legacy Julian/Gregorian hybrid calendar is used, \
                          or a Proleptic Gregorian calendar is used.",
                             v
@@ -360,7 +388,7 @@ impl PlainDecoding for Int64TimestampMicrosType {
                     if unlikely(v.read_unaligned() < JULIAN_GREGORIAN_SWITCH_OFF_TS) {
                         panic!(
                             "Encountered timestamp value {}, which is before 1582-10-15 (counting \
-                         backwards from Unix eopch date 1970-01-01), and could be ambigous \
+                         backwards from Unix epoch date 1970-01-01), and could be ambigous \
                          depending on whether a legacy Julian/Gregorian hybrid calendar is used, \
                          or a Proleptic Gregorian calendar is used.",
                             *v
@@ -439,7 +467,10 @@ macro_rules! make_int_variant_impl {
 
 make_int_variant_impl!(Int8Type, copy_i32_to_i8, 1);
 make_int_variant_impl!(Int16Type, copy_i32_to_i16, 2);
-make_int_variant_impl!(Int32To64Type, copy_i32_to_i64, 4);
+make_int_variant_impl!(Int16ToDoubleType, copy_i32_to_f64, 8); // Parquet uses Int16 using 4 bytes
+make_int_variant_impl!(Int32To64Type, copy_i32_to_i64, 8);
+make_int_variant_impl!(Int32ToDoubleType, copy_i32_to_f64, 8);
+make_int_variant_impl!(FloatToDoubleType, copy_f32_to_f64, 8);
 
 // unsigned type require double the width and zeroes are written for the second half
 // perhaps because they are implemented as the next size up signed type?
@@ -447,6 +478,81 @@ make_int_variant_impl!(UInt8Type, copy_i32_to_u8, 2);
 make_int_variant_impl!(UInt16Type, copy_i32_to_u16, 4);
 make_int_variant_impl!(UInt32Type, copy_i32_to_u32, 8);
 
+macro_rules! make_int_decimal_variant_impl {
+    ($ty:ty, $copy_fn:ident, $type_width:expr, $dst_type:ty) => {
+        impl PlainDecoding for $ty {
+            fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
+                let dst_slice = dst.value_buffer.as_slice_mut();
+                let dst_offset = dst.num_values * 8;
+                $copy_fn(&src.data[src.offset..], &mut dst_slice[dst_offset..], num);
+
+                let src_precision = src.desc.type_precision() as u32;
+                let src_scale = ::std::cmp::max(src.desc.type_scale(), 0) as u32;
+                let (dst_precision, dst_scale) = match dst.arrow_type {
+                    ArrowDataType::Decimal128(p, s) if s >= 0 => (p as u32, s as u32),
+                    _ => unreachable!(),
+                };
+                let upper = (10 as $dst_type).pow(dst_precision);
+                let v = dst_slice[dst_offset..].as_mut_ptr() as *mut $dst_type;
+                if dst_scale > src_scale {
+                    let mul = (10 as $dst_type).pow(dst_scale - src_scale);
+                    for i in 0..num {
+                        unsafe {
+                            // SAFETY the caller must ensure `i`th pointer is in bounds
+                            let v = v.add(i);
+                            write_val_or_null!(v, v.read_unaligned() * mul, upper, dst, i);
+                        }
+                    }
+                } else if dst_scale < src_scale {
+                    let div = (10 as $dst_type).pow(src_scale - dst_scale);
+                    for i in 0..num {
+                        unsafe {
+                            // SAFETY the caller must ensure `i`th pointer is in bounds
+                            let v = v.add(i);
+                            write_val_or_null!(v, v.read_unaligned() / div, upper, dst, i);
+                        }
+                    }
+                } else if src_precision > dst_precision {
+                    for i in 0..num {
+                        unsafe {
+                            // SAFETY the caller must ensure `i`th pointer is in bounds
+                            let v = v.add(i);
+                            write_null!(v.read_unaligned(), upper, dst, i);
+                        }
+                    }
+                }
+
+                src.offset += $type_width * num;
+            }
+
+            fn skip(src: &mut PlainDecoderInner, num: usize) {
+                src.offset += $type_width * num;
+            }
+        }
+    };
+}
+make_int_decimal_variant_impl!(Int32ToDecimal64Type, copy_i32_to_i64, 4, i64);
+make_int_decimal_variant_impl!(Int64ToDecimal64Type, copy_i64_to_i64, 8, i64);
+
+#[macro_export]
+macro_rules! write_val_or_null {
+    ($v: expr, $adjusted: expr, $upper: expr, $dst: expr, $i: expr) => {
+        let adjusted = $adjusted;
+        $v.write_unaligned(adjusted);
+        write_null!(adjusted, $upper, $dst, $i);
+    };
+}
+
+#[macro_export]
+macro_rules! write_null {
+    ($val: expr, $upper: expr, $dst: expr, $i: expr) => {
+        if $upper <= $val {
+            bit::unset_bit($dst.validity_buffer.as_slice_mut(), $dst.num_values + $i);
+            $dst.num_nulls += 1;
+        }
+    };
+}
+
 macro_rules! generate_cast_to_unsigned {
     ($name: ident, $src_type:ty, $dst_type:ty, $zero_value:expr) => {
         pub fn $name(src: &[u8], dst: &mut [u8], num: usize) {
@@ -506,6 +612,21 @@ macro_rules! generate_cast_to_signed {
 generate_cast_to_signed!(copy_i32_to_i8, i32, i8);
 generate_cast_to_signed!(copy_i32_to_i16, i32, i16);
 generate_cast_to_signed!(copy_i32_to_i64, i32, i64);
+generate_cast_to_signed!(copy_i32_to_f64, i32, f64);
+generate_cast_to_signed!(copy_f32_to_f64, f32, f64);
+
+fn copy_i64_to_i64(src: &[u8], dst: &mut [u8], num: usize) {
+    debug_assert!(
+        src.len() >= num * std::mem::size_of::<i64>(),
+        "Source slice is too small"
+    );
+    debug_assert!(
+        dst.len() >= num * std::mem::size_of::<i64>(),
+        "Destination slice is too small"
+    );
+
+    bit::memcpy_value(src, std::mem::size_of::<i64>() * num, dst);
+}
 
 // Shared implementation for variants of Binary type
 macro_rules! make_plain_binary_impl {
@@ -652,13 +773,16 @@ macro_rules! make_plain_decimal_impl {
 
                     debug_assert!(byte_width <= DECIMAL_BYTE_WIDTH);
 
-                    let src_scale = src.desc.type_scale() as u32;
-                    let dst_scale = match dst.arrow_type {
-                        ArrowDataType::Decimal128(_percision, scale) => scale as u32,
-                        _ => unreachable!()
+                    let src_precision = src.desc.type_precision() as u32;
+                    let src_scale = ::std::cmp::max(src.desc.type_scale(), 0) as u32;
+                    let (dst_precision, dst_scale) = match dst.arrow_type {
+                        ArrowDataType::Decimal128(p, s) if s >= 0 => (p as u32, s as u32),
+                        _ => unreachable!(),
                     };
+                    let upper = 10_i128.pow(dst_precision);
+                    let mul_div = 10_i128.pow(dst_scale.abs_diff(src_scale));
 
-                    for _ in 0..num {
+                    for i in 0..num {
                         let s = &mut dst_data[dst_offset..];
 
                         bit::memcpy(
@@ -682,12 +806,20 @@ macro_rules! make_plain_decimal_impl {
                         }
 
                         if dst_scale > src_scale {
-                            let exp = dst_scale - src_scale;
-                            let mul = 10_i128.pow(exp);
                             let v = s.as_mut_ptr() as *mut i128;
                             unsafe {
-                                 v.write_unaligned(v.read_unaligned() * mul);
+                                // SAFETY the caller must ensure `i`th pointer is in bounds
+                                write_val_or_null!(v, v.read_unaligned() * mul_div, upper, dst, i);
+                            }
+                        } else if dst_scale < src_scale {
+                            let v = s.as_mut_ptr() as *mut i128;
+                            unsafe {
+                                // SAFETY the caller must ensure `i`th pointer is in bounds
+                                write_val_or_null!(v, v.read_unaligned() / mul_div, upper, dst, i);
                             }
+                        } else  if src_precision > dst_precision {
+                            let v = s.as_mut_ptr() as *mut i128;
+                            write_null!(unsafe { v.read_unaligned() }, upper, dst, i);
                         }
 
                         src_offset += byte_width;
@@ -728,13 +860,22 @@ macro_rules! make_plain_decimal_int_impl {
             impl PlainDecoding for $ty {
                 fn decode(src: &mut PlainDecoderInner, dst: &mut ParquetMutableVector, num: usize) {
                     let byte_width = src.desc.type_length() as usize;
-                    let num_bits = 8 * byte_width;
+                    let num_bits = 64.min(8 * byte_width);
 
                     let src_data = &src.data[src.offset..];
                     let dst_data = &mut dst.value_buffer[dst.num_values * $num_bytes..];
 
                     let mut src_offset = 0;
 
+                    let src_precision = src.desc.type_precision() as u32;
+                    let src_scale = ::std::cmp::max(src.desc.type_scale(), 0) as u32;
+                    let (dst_precision, dst_scale) = match dst.arrow_type {
+                        ArrowDataType::Decimal128(p, s) if s >= 0 => (p as u32, s as u32),
+                        _ => (src_precision, src_scale),
+                    };
+                    let upper = 10_i64.pow(dst_precision);
+                    let mul_div = 10_i64.pow(dst_scale.abs_diff(src_scale));
+
                     for i in 0..num {
                         let mut unscaled: i64 = 0;
                         for _ in 0..byte_width {
@@ -742,8 +883,15 @@ macro_rules! make_plain_decimal_int_impl {
                             src_offset += 1;
                         }
                         unscaled = (unscaled << (64 - num_bits)) >> (64 - num_bits);
-                        bit::memcpy_value(&unscaled, $num_bytes, &mut dst_data[i *
-                        $num_bytes..]);
+                        if dst_scale > src_scale {
+                            unscaled *= mul_div;
+                        } else if dst_scale < src_scale {
+                            unscaled /= mul_div;
+                        }
+                        bit::memcpy_value(&unscaled, $num_bytes, &mut dst_data[i * $num_bytes..]);
+                        if src_precision > dst_precision {
+                            write_null!(unscaled, upper, dst, i);
+                        }
                     }
 
                     src.offset += num * byte_width;
@@ -801,7 +949,7 @@ impl PlainDecoding for Int96TimestampMicrosType {
                 if unlikely(micros < JULIAN_GREGORIAN_SWITCH_OFF_TS) {
                     panic!(
                         "Encountered timestamp value {}, which is before 1582-10-15 (counting \
-                         backwards from Unix eopch date 1970-01-01), and could be ambigous \
+                         backwards from Unix epoch date 1970-01-01), and could be ambigous \
                          depending on whether a legacy Julian/Gregorian hybrid calendar is used, \
                          or a Proleptic Gregorian calendar is used.",
                         micros
diff --git a/native/core/src/parquet/util/jni.rs b/native/core/src/parquet/util/jni.rs
index cde9fff0f..bcd82c6cf 100644
--- a/native/core/src/parquet/util/jni.rs
+++ b/native/core/src/parquet/util/jni.rs
@@ -97,23 +97,31 @@ pub struct TypePromotionInfo {
     pub(crate) physical_type: PhysicalType,
     pub(crate) precision: i32,
     pub(crate) scale: i32,
+    pub(crate) bit_width: i32,
 }
 
 impl TypePromotionInfo {
-    pub fn new_from_jni(physical_type_id: jint, precision: jint, scale: jint) -> Self {
+    pub fn new_from_jni(
+        physical_type_id: jint,
+        precision: jint,
+        scale: jint,
+        bit_width: jint,
+    ) -> Self {
         let physical_type = convert_physical_type(physical_type_id);
         Self {
             physical_type,
             precision,
             scale,
+            bit_width,
         }
     }
 
-    pub fn new(physical_type: PhysicalType, precision: i32, scale: i32) -> Self {
+    pub fn new(physical_type: PhysicalType, precision: i32, scale: i32, bit_width: i32) -> Self {
         Self {
             physical_type,
             precision,
             scale,
+            bit_width,
         }
     }
 }