From 752b9d776d6c6587cdb7fb05e3c5c16ab616046a Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Wed, 28 May 2014 00:20:58 -0700 Subject: [PATCH] FileLogger: Fix compile against Hadoop trunk In Hadoop trunk (currently Hadoop 3.0.0), the deprecated FSDataOutputStream#sync() method has been removed. Instead, the FSDataOutputStream#hflush method fills the same role. We should call hflush if it is available. This patch uses reflection to maintain support for old versions of Hadoop that do not have hflush, but which do have the deprecated sync method. --- .../scala/org/apache/spark/util/FileLogger.scala | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/FileLogger.scala b/core/src/main/scala/org/apache/spark/util/FileLogger.scala index 0e6d21b22023a..6a95dc06e155d 100644 --- a/core/src/main/scala/org/apache/spark/util/FileLogger.scala +++ b/core/src/main/scala/org/apache/spark/util/FileLogger.scala @@ -61,6 +61,14 @@ private[spark] class FileLogger( // Only defined if the file system scheme is not local private var hadoopDataStream: Option[FSDataOutputStream] = None + // The Hadoop APIs have changed over time, so we use reflection to figure out + // the correct method to use to flush a hadoop data stream. See SPARK-1518 + // for details. + private val hadoopFlushMethod = { + val cls = classOf[FSDataOutputStream] + scala.util.Try(cls.getMethod("hflush")).getOrElse(cls.getMethod("sync")) + } + private var writer: Option[PrintWriter] = None /** @@ -149,13 +157,13 @@ private[spark] class FileLogger( /** * Flush the writer to disk manually. * - * If the Hadoop FileSystem is used, the underlying FSDataOutputStream (r1.0.4) must be - * sync()'ed manually as it does not support flush(), which is invoked by when higher - * level streams are flushed. + * When using a Hadoop filesystem, we need to invoke the hflush or sync + * method. In HDFS, hflush guarantees that the data gets to all the + * DataNodes. */ def flush() { writer.foreach(_.flush()) - hadoopDataStream.foreach(_.sync()) + hadoopDataStream.foreach(hadoopFlushMethod.invoke(_)) } /**