From 752b9d776d6c6587cdb7fb05e3c5c16ab616046a Mon Sep 17 00:00:00 2001
From: Colin McCabe <cmccabe@cloudera.com>
Date: Wed, 28 May 2014 00:20:58 -0700
Subject: [PATCH] FileLogger: Fix compile against Hadoop trunk

In Hadoop trunk (currently Hadoop 3.0.0), the deprecated
FSDataOutputStream#sync() method has been removed.  Instead, the
FSDataOutputStream#hflush method fills the same role.  We should call
hflush if it is available.  This patch uses reflection to maintain
support for old versions of Hadoop that do not have hflush, but which do
have the deprecated sync method.
---
 .../scala/org/apache/spark/util/FileLogger.scala | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/FileLogger.scala b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
index 0e6d21b22023a..6a95dc06e155d 100644
--- a/core/src/main/scala/org/apache/spark/util/FileLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
@@ -61,6 +61,14 @@ private[spark] class FileLogger(
   // Only defined if the file system scheme is not local
   private var hadoopDataStream: Option[FSDataOutputStream] = None
 
+  // The Hadoop APIs have changed over time, so we use reflection to figure out
+  // the correct method to use to flush a hadoop data stream. See SPARK-1518
+  // for details.
+  private val hadoopFlushMethod = {
+    val cls = classOf[FSDataOutputStream]
+    scala.util.Try(cls.getMethod("hflush")).getOrElse(cls.getMethod("sync"))
+  }
+
   private var writer: Option[PrintWriter] = None
 
   /**
@@ -149,13 +157,13 @@ private[spark] class FileLogger(
   /**
    * Flush the writer to disk manually.
    *
-   * If the Hadoop FileSystem is used, the underlying FSDataOutputStream (r1.0.4) must be
-   * sync()'ed manually as it does not support flush(), which is invoked by when higher
-   * level streams are flushed.
+   * When using a Hadoop filesystem, we need to invoke the hflush or sync
+   * method. In HDFS, hflush guarantees that the data gets to all the
+   * DataNodes.
    */
   def flush() {
     writer.foreach(_.flush())
-    hadoopDataStream.foreach(_.sync())
+    hadoopDataStream.foreach(hadoopFlushMethod.invoke(_))
   }
 
   /**