apache · waitinfuture · Dec 13, 2023 · Dec 7, 2023
diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/TestOperator.scala b/backends-velox/src/test/scala/io/glutenproject/execution/TestOperator.scala
@@ -17,9 +17,10 @@
 package io.glutenproject.execution
 
 import io.glutenproject.GlutenConfig
+import io.glutenproject.sql.shims.SparkShimLoader
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
 import org.apache.spark.sql.execution.{GenerateExec, RDDScanExec}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.functions.{avg, col, udf}
@@ -675,4 +676,22 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla
       }
     }
   }
+
+  test("Verify parquet field name with special character") {
+    withTable("t") {
+
+      // https://github.com/apache/spark/pull/35229 Spark remove parquet field name check after 3.2
+      if (!SparkShimLoader.getSparkVersion.startsWith("3.2")) {
+        sql("create table t using parquet as select sum(l_partkey) from lineitem")
+        runQueryAndCompare("select * from t") {
+          checkOperatorMatch[FileSourceScanExecTransformer]
+        }
+      } else {
+        val msg = intercept[AnalysisException] {
+          sql("create table t using parquet as select sum(l_partkey) from lineitem")
+        }.message
+        assert(msg.contains("contains invalid character"))
+      }
+    }
+  }
 }
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -50,27 +50,6 @@ static const std::unordered_set<std::string> kBlackList = {
     "arrays_overlap",
     "approx_percentile"};
 
-bool validateColNames(const ::substrait::NamedStruct& schema) {
-  // Keeps the same logic with 'Tokenizer::isUnquotedPathCharacter' at
-  // https://github.com/oap-project/velox/blob/update/velox/type/Tokenizer.cpp#L154-L157.
-  auto isUnquotedPathCharacter = [](char c) {
-    return c == ':' || c == '$' || c == '-' || c == '/' || c == '@' || c == '|' || c == '#' || c == '.' || c == '-' ||
-        c == '_' || isalnum(c);
-  };
-
-  for (const auto& name : schema.names()) {
-    for (auto i = 0; i < name.size(); i++) {
-      auto c = name[i];
-      if (!isUnquotedPathCharacter(c)) {
-        std::cout << "native validation failed due to: Illegal column charactor " << c << "in column " << name
-                  << std::endl;
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
 } // namespace
 
 bool SubstraitToVeloxPlanValidator::validateInputTypes(
@@ -1160,13 +1139,6 @@ bool SubstraitToVeloxPlanValidator::validate(const ::substrait::ReadRel& readRel
     }
   }
 
-  if (readRel.has_base_schema()) {
-    const auto& baseSchema = readRel.base_schema();
-    if (!validateColNames(baseSchema)) {
-      logValidateMsg("native validation failed due to: Validation failed for column name contains illegal charactor.");
-      return false;
-    }
-  }
   return true;
 }
 

diff --git a/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala b/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala
@@ -123,7 +123,7 @@ object ConverterUtils extends Logging {
   // TODO: This is used only by `BasicScanExecTransformer`,
   //  perhaps we can remove this in the future and use `withExprId` version consistently.
   def collectAttributeNamesWithoutExprId(attributes: Seq[Attribute]): JList[String] = {
-    collectAttributeNamesDFS(attributes)(genColumnNameWithoutExprId)
+    collectAttributeNamesDFS(attributes)(attr => normalizeColName(attr.name))
   }
 
   private def collectAttributeNamesDFS(attributes: Seq[Attribute])(