diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala index fcb9513ab66f6..81dd24c1088b8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala @@ -183,7 +183,21 @@ private[sql] object ParquetRelation { // for the schema of a parquet data. val schema = StructType.fromAttributes(attributes).asNullable val newAttributes = schema.toAttributes - ParquetTypesConverter.writeMetaData(newAttributes, path, conf) + if (sqlContext.sparkContext.hadoopConfiguration + .getBoolean(ParquetOutputFormat.ENABLE_JOB_SUMMARY, true)) { + ParquetTypesConverter.writeMetaData(attributes, path, conf) + } else { + // Create only the directory without the metafile + val fs = path.getFileSystem(conf) + if (fs == null) { + throw new IllegalArgumentException( + s"ParquetRelation: Path $path is incorrectly formatted") + } + if (!fs.exists(path) || !fs.getFileStatus(path).isDir) { + fs.mkdirs(path) + } + } + new ParquetRelation(path.toString, Some(conf), sqlContext) { override val output = newAttributes } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala index 1c868da23e060..c851fcc8de62d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala @@ -652,6 +652,10 @@ private[parquet] object FileSystemHelper { */ def findMaxTaskId(pathStr: String, conf: Configuration): Int = { val files = FileSystemHelper.listFiles(pathStr, conf) + // Return in case the "parquet.enable.summary-metadata" is false + if(files.size == 0) { + return 0 + } // filename pattern is part-r-.parquet val nameP = new scala.util.matching.Regex("""part-r-(\d{1,}).parquet""", "taskid") val hiddenFileP = new scala.util.matching.Regex("_.*")