apache · baibaichen · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 10, 2026
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -1495,6 +1495,31 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
   // The columns present in the table, if not available default to the baseSchema.
   auto tableSchema = splitInfo->tableSchema ? splitInfo->tableSchema : baseSchema;
 
+  // Build dataColumns from tableSchema, excluding partition columns.
+  // HiveTableHandle::dataColumns() is used as fileSchema for the reader.
+  // Partition columns should not be validated against the file's physical types
+  // (their values come from the partition path, not from the file).
+  std::unordered_set<std::string> partitionColNames;
+  for (int idx = 0; idx < colNameList.size(); idx++) {
+    if (columnTypes[idx] == ColumnType::kPartitionKey) {
+      partitionColNames.insert(colNameList[idx]);
+    }
+  }
+  RowTypePtr dataColumns;
+  if (partitionColNames.empty()) {
+    dataColumns = tableSchema;
+  } else {
+    std::vector<std::string> dataColNames;
+    std::vector<TypePtr> dataColTypes;
+    for (int idx = 0; idx < tableSchema->size(); idx++) {
+      if (partitionColNames.find(tableSchema->nameOf(idx)) == partitionColNames.end()) {
+        dataColNames.push_back(tableSchema->nameOf(idx));
+        dataColTypes.push_back(tableSchema->childAt(idx));
+      }
+    }
+    dataColumns = ROW(std::move(dataColNames), std::move(dataColTypes));
+  }
+
   connector::ConnectorTableHandlePtr tableHandle;
   auto remainingFilter = readRel.has_filter() ? exprConverter_->toVeloxExpr(readRel.filter(), baseSchema) : nullptr;
   auto connectorId = kHiveConnectorId;
@@ -1506,7 +1531,7 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
   }
   common::SubfieldFilters subfieldFilters;
   tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
-      connectorId, "hive_table", std::move(subfieldFilters), remainingFilter, tableSchema);
+      connectorId, "hive_table", std::move(subfieldFilters), remainingFilter, dataColumns);
 
   // Get assignments and out names.
   std::vector<std::string> outNames;

diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh
@@ -17,8 +17,8 @@
 set -exu
 
 CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
-VELOX_REPO=https://github.com/IBM/velox.git
-VELOX_BRANCH=dft-2026_03_08-iceberg
+VELOX_REPO=https://github.com/baibaichen/velox.git
+VELOX_BRANCH=pr2/fix-parquet-thrift-spark18108
 VELOX_ENHANCED_BRANCH=ibm-2026_03_08
 VELOX_HOME=""
 RUN_SETUP_SCRIPT=ON

diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -449,6 +449,7 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("SPARK-35640: int as long should throw schema incompatible error")
     // Velox parquet reader not allow offset zero.
     .exclude("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings")
+    .exclude("SPARK-34817: Read UINT_64 as Decimal from parquet")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
   enableSuite[GlutenParquetV2PartitionDiscoverySuite]
   enableSuite[GlutenParquetProtobufCompatibilitySuite]
@@ -463,6 +464,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV2QuerySuite]
     // Unsupport spark.sql.files.ignoreCorruptFiles.
     .exclude("Enabling/disabling ignoreCorruptFiles")
@@ -471,6 +473,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV1SchemaPruningSuite]
   enableSuite[GlutenParquetV2SchemaPruningSuite]
   enableSuite[GlutenParquetRebaseDatetimeV1Suite]

diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -453,6 +453,7 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("SPARK-35640: int as long should throw schema incompatible error")
     // Velox parquet reader not allow offset zero.
     .exclude("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings")
+    .exclude("SPARK-34817: Read UINT_64 as Decimal from parquet")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
   enableSuite[GlutenParquetV2PartitionDiscoverySuite]
   enableSuite[GlutenParquetProtobufCompatibilitySuite]
@@ -468,6 +469,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV2QuerySuite]
     .exclude("row group skipping doesn't overflow when reading into larger type")
     // Unsupport spark.sql.files.ignoreCorruptFiles.
@@ -477,6 +479,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV1SchemaPruningSuite]
   enableSuite[GlutenParquetV2SchemaPruningSuite]
   enableSuite[GlutenParquetRebaseDatetimeV1Suite]

diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -396,6 +396,7 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("SPARK-35640: int as long should throw schema incompatible error")
     // Velox parquet reader not allow offset zero.
     .exclude("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings")
+    .exclude("SPARK-34817: Read UINT_64 as Decimal from parquet")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
   enableSuite[GlutenParquetV2PartitionDiscoverySuite]
   enableSuite[GlutenParquetProtobufCompatibilitySuite]
@@ -411,6 +412,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV2QuerySuite]
     .exclude("row group skipping doesn't overflow when reading into larger type")
     // Unsupport spark.sql.files.ignoreCorruptFiles.
@@ -420,6 +422,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV1SchemaPruningSuite]
   enableSuite[GlutenParquetV2SchemaPruningSuite]
   enableSuite[GlutenParquetRebaseDatetimeV1Suite]

diff --git a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -363,11 +363,9 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("unsupported parquet conversion IntegerType -> DecimalType(10,1)")
     .exclude("unsupported parquet conversion IntegerType -> DecimalType(5,0)")
     .exclude("unsupported parquet conversion IntegerType -> DecimalType(9,0)")
-    .exclude("unsupported parquet conversion LongType -> DateType")
     .exclude("unsupported parquet conversion LongType -> DecimalType(10,0)")
     .exclude("unsupported parquet conversion LongType -> DecimalType(19,0)")
     .exclude("unsupported parquet conversion LongType -> DecimalType(20,1)")
-    .exclude("unsupported parquet conversion LongType -> IntegerType")
     .exclude("unsupported parquet conversion ShortType -> DecimalType(3,0)")
     .exclude("unsupported parquet conversion ShortType -> DecimalType(4,0)")
     .exclude("unsupported parquet conversion ShortType -> DecimalType(5,0)")
@@ -379,13 +377,15 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("parquet widening conversion IntegerType -> DecimalType(20,0)")
     .exclude("parquet widening conversion IntegerType -> DecimalType(38,0)")
     .exclude("parquet widening conversion IntegerType -> DoubleType")
+    .exclude("parquet widening conversion IntegerType -> ShortType")
     .exclude("parquet widening conversion LongType -> DecimalType(20,0)")
     .exclude("parquet widening conversion LongType -> DecimalType(21,1)")
     .exclude("parquet widening conversion LongType -> DecimalType(38,0)")
     .exclude("parquet widening conversion ShortType -> DecimalType(11,1)")
     .exclude("parquet widening conversion ShortType -> DecimalType(20,0)")
     .exclude("parquet widening conversion ShortType -> DecimalType(38,0)")
     .exclude("parquet widening conversion ShortType -> DoubleType")
+    .exclude("parquet decimal type change IntegerType -> ShortType overflows")
   enableSuite[GlutenParquetVariantShreddingSuite]
   // Generated suites for org.apache.spark.sql.execution.datasources.text
   // TODO: 4.x enableSuite[GlutenWholeTextFileV1Suite]  // 1 failure
@@ -578,6 +578,7 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings")
     // TODO: fix in Spark-4.0
     .exclude("explode nested lists crossing a rowgroup boundary")
+    .exclude("SPARK-34817: Read UINT_64 as Decimal from parquet")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
   enableSuite[GlutenParquetV2PartitionDiscoverySuite]
   enableSuite[GlutenParquetProtobufCompatibilitySuite]
@@ -593,6 +594,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV2QuerySuite]
     .exclude("row group skipping doesn't overflow when reading into larger type")
     // Unsupport spark.sql.files.ignoreCorruptFiles.
@@ -602,6 +604,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV1SchemaPruningSuite]
   enableSuite[GlutenParquetV2SchemaPruningSuite]
   enableSuite[GlutenParquetRebaseDatetimeV1Suite]

diff --git a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -374,11 +374,9 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("unsupported parquet conversion IntegerType -> DecimalType(10,1)")
     .exclude("unsupported parquet conversion IntegerType -> DecimalType(5,0)")
     .exclude("unsupported parquet conversion IntegerType -> DecimalType(9,0)")
-    .exclude("unsupported parquet conversion LongType -> DateType")
     .exclude("unsupported parquet conversion LongType -> DecimalType(10,0)")
     .exclude("unsupported parquet conversion LongType -> DecimalType(19,0)")
     .exclude("unsupported parquet conversion LongType -> DecimalType(20,1)")
-    .exclude("unsupported parquet conversion LongType -> IntegerType")
     .exclude("unsupported parquet conversion ShortType -> DecimalType(3,0)")
     .exclude("unsupported parquet conversion ShortType -> DecimalType(4,0)")
     .exclude("unsupported parquet conversion ShortType -> DecimalType(5,0)")
@@ -390,13 +388,15 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("parquet widening conversion IntegerType -> DecimalType(20,0)")
     .exclude("parquet widening conversion IntegerType -> DecimalType(38,0)")
     .exclude("parquet widening conversion IntegerType -> DoubleType")
+    .exclude("parquet widening conversion IntegerType -> ShortType")
     .exclude("parquet widening conversion LongType -> DecimalType(20,0)")
     .exclude("parquet widening conversion LongType -> DecimalType(21,1)")
     .exclude("parquet widening conversion LongType -> DecimalType(38,0)")
     .exclude("parquet widening conversion ShortType -> DecimalType(11,1)")
     .exclude("parquet widening conversion ShortType -> DecimalType(20,0)")
     .exclude("parquet widening conversion ShortType -> DecimalType(38,0)")
     .exclude("parquet widening conversion ShortType -> DoubleType")
+    .exclude("parquet decimal type change IntegerType -> ShortType overflows")
   // TODO: 4.x enableSuite[GlutenParquetVariantShreddingSuite]  // 1 failure
   // Generated suites for org.apache.spark.sql.execution.datasources.text
   // TODO: 4.x enableSuite[GlutenWholeTextFileV1Suite]  // 1 failure
@@ -539,6 +539,7 @@ class VeloxTestSettings extends BackendTestSettings {
     .excludeByPrefix("SPARK-53535") // see https://issues.apache.org/jira/browse/SPARK-53535
     .excludeByPrefix("vectorized reader: missing all struct fields")
     .excludeByPrefix("SPARK-54220") // https://issues.apache.org/jira/browse/SPARK-54220
+    .exclude("SPARK-34817: Read UINT_64 as Decimal from parquet")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
   enableSuite[GlutenParquetV2PartitionDiscoverySuite]
   enableSuite[GlutenParquetProtobufCompatibilitySuite]
@@ -554,6 +555,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV2QuerySuite]
     .exclude("row group skipping doesn't overflow when reading into larger type")
     // Unsupport spark.sql.files.ignoreCorruptFiles.
@@ -563,6 +565,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite because the filter after datasource is not needed.
     .exclude(
       "SPARK-26677: negated null-safe equality comparison should not filter matched row groups")
+    .exclude("SPARK-16632: read Parquet int32 as ByteType and ShortType")
   enableSuite[GlutenParquetV1SchemaPruningSuite]
   enableSuite[GlutenParquetV2SchemaPruningSuite]
   enableSuite[GlutenParquetRebaseDatetimeV1Suite]