From 8ea871f8130b2490f1bad7374a819bf56f0ccbbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christophe=20Pr=C3=A9aud?= <christophe.preaud@kelkoo.com>
Date: Mon, 20 Oct 2014 11:58:56 +0200
Subject: [PATCH 1/7] Ensure that files are fetched atomically

tempFile is created in the same directory than targetFile, so that the
move from tempFile to targetFile is always atomic
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 53a7512edd852..8808858b83ea5 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -335,8 +335,7 @@ private[spark] object Utils extends Logging {
   def fetchFile(url: String, targetDir: File, conf: SparkConf, securityMgr: SecurityManager,
     hadoopConf: Configuration) {
     val filename = url.split("/").last
-    val tempDir = getLocalDir(conf)
-    val tempFile =  File.createTempFile("fetchFileTemp", null, new File(tempDir))
+    val tempFile =  File.createTempFile("fetchFileTemp", null, new File(targetDir.getAbsolutePath))
     val targetFile = new File(targetDir, filename)
     val uri = new URI(url)
     val fileOverwrite = conf.getBoolean("spark.files.overwrite", defaultValue = false)

From c6a5590c9866366a70dd696ff7b2bffb185201d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christophe=20Pr=C3=A9aud?= <christophe.preaud@kelkoo.com>
Date: Mon, 3 Nov 2014 14:25:05 +0100
Subject: [PATCH 2/7] Revert commit 8ea871f8130b2490f1bad7374a819bf56f0ccbbd

---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index e226abf8d80b6..07c4ae000e406 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -356,7 +356,8 @@ private[spark] object Utils extends Logging {
   def fetchFile(url: String, targetDir: File, conf: SparkConf, securityMgr: SecurityManager,
     hadoopConf: Configuration) {
     val filename = url.split("/").last
-    val tempFile =  File.createTempFile("fetchFileTemp", null, new File(targetDir.getAbsolutePath))
+    val tempDir = getLocalDir(conf)
+    val tempFile = File.createTempFile("fetchFileTemp", null, new File(tempDir))
     val targetFile = new File(targetDir, filename)
     val uri = new URI(url)
     val fileOverwrite = conf.getBoolean("spark.files.overwrite", defaultValue = false)

From 9ba89caafc57e8d8b94f7b98e39c4613f0f5158d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christophe=20Pr=C3=A9aud?= <christophe.preaud@kelkoo.com>
Date: Mon, 3 Nov 2014 14:37:46 +0100
Subject: [PATCH 3/7] Ensure that files are fetched atomically

tempFile is created in the same directory than targetFile, so that the
move from tempFile to targetFile is always atomic
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 2771304593ff2..134cd56269507 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -425,8 +425,7 @@ private[spark] object Utils extends Logging {
       conf: SparkConf,
       securityMgr: SecurityManager,
       hadoopConf: Configuration) {
-    val tempDir = getLocalDir(conf)
-    val tempFile = File.createTempFile("fetchFileTemp", null, new File(tempDir))
+    val tempFile = File.createTempFile("fetchFileTemp", null, new File(targetDir.getAbsolutePath))
     val targetFile = new File(targetDir, filename)
     val uri = new URI(url)
     val fileOverwrite = conf.getBoolean("spark.files.overwrite", defaultValue = false)

From 876ae5e18492d20a38c1032547c3357394c7e7f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christophe=20Pr=C3=A9aud?= <christophe.preaud@kelkoo.com>
Date: Tue, 24 Mar 2015 10:52:13 +0100
Subject: [PATCH 4/7] Clarify the local directories usage in YARN

---
 docs/running-on-yarn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 68b1aeb8ebd01..744d989cde0d4 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -274,6 +274,6 @@ If you need a reference to the proper location to put log files in the YARN so t
 # Important notes
 
 - Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
-- The local directories used by Spark executors will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored.
+- In yarn-cluster mode, the local directories used by the Spark executors and the Spark driver will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored. In yarn-client mode, the Spark executors will use the local directories configured for YARN while the Spark driver will use those defined in `spark.local.dir` (this is because the Spark driver does not run on the YARN cluster in yarn-client mode, only the Spark executors do).
 - The `--files` and `--archives` options support specifying file names with the # similar to Hadoop. For example you can specify: `--files localtest.txt#appSees.txt` and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name `appSees.txt`, and your application should use the name as `appSees.txt` to reference it when running on YARN.
 - The `--jars` option allows the `SparkContext.addJar` function to work if you are using it with local files and running in `yarn-cluster` mode. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.

From 436fb7d3f154aea7a184d9a5294d1b3fc91fe057 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christophe=20Pr=C3=A9aud?= <christophe.preaud@kelkoo.com>
Date: Tue, 24 Mar 2015 11:01:34 +0100
Subject: [PATCH 5/7] Revert "Clarify the local directories usage in YARN"

This reverts commit 876ae5e18492d20a38c1032547c3357394c7e7f7.
---
 docs/running-on-yarn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 744d989cde0d4..68b1aeb8ebd01 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -274,6 +274,6 @@ If you need a reference to the proper location to put log files in the YARN so t
 # Important notes
 
 - Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
-- In yarn-cluster mode, the local directories used by the Spark executors and the Spark driver will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored. In yarn-client mode, the Spark executors will use the local directories configured for YARN while the Spark driver will use those defined in `spark.local.dir` (this is because the Spark driver does not run on the YARN cluster in yarn-client mode, only the Spark executors do).
+- The local directories used by Spark executors will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored.
 - The `--files` and `--archives` options support specifying file names with the # similar to Hadoop. For example you can specify: `--files localtest.txt#appSees.txt` and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name `appSees.txt`, and your application should use the name as `appSees.txt` to reference it when running on YARN.
 - The `--jars` option allows the `SparkContext.addJar` function to work if you are using it with local files and running in `yarn-cluster` mode. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.

From eaaf5191ca9e93b8504a28c4f18ab71a529db811 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christophe=20Pr=C3=A9aud?= <christophe.preaud@kelkoo.com>
Date: Tue, 24 Mar 2015 11:06:40 +0100
Subject: [PATCH 6/7] Clarify the local directories usage in YARN

---
 docs/running-on-yarn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 68b1aeb8ebd01..744d989cde0d4 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -274,6 +274,6 @@ If you need a reference to the proper location to put log files in the YARN so t
 # Important notes
 
 - Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
-- The local directories used by Spark executors will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored.
+- In yarn-cluster mode, the local directories used by the Spark executors and the Spark driver will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored. In yarn-client mode, the Spark executors will use the local directories configured for YARN while the Spark driver will use those defined in `spark.local.dir` (this is because the Spark driver does not run on the YARN cluster in yarn-client mode, only the Spark executors do).
 - The `--files` and `--archives` options support specifying file names with the # similar to Hadoop. For example you can specify: `--files localtest.txt#appSees.txt` and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name `appSees.txt`, and your application should use the name as `appSees.txt` to reference it when running on YARN.
 - The `--jars` option allows the `SparkContext.addJar` function to work if you are using it with local files and running in `yarn-cluster` mode. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.

From 6912b90d57710e7f49c8674e141100fb5ec05e33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christophe=20Pr=C3=A9aud?= <christophe.preaud@kelkoo.com>
Date: Tue, 24 Mar 2015 12:07:12 +0100
Subject: [PATCH 7/7] Fix some formatting issues.

---
 docs/running-on-yarn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 744d989cde0d4..d9f3eb2b74b18 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -274,6 +274,6 @@ If you need a reference to the proper location to put log files in the YARN so t
 # Important notes
 
 - Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
-- In yarn-cluster mode, the local directories used by the Spark executors and the Spark driver will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored. In yarn-client mode, the Spark executors will use the local directories configured for YARN while the Spark driver will use those defined in `spark.local.dir` (this is because the Spark driver does not run on the YARN cluster in yarn-client mode, only the Spark executors do).
+- In `yarn-cluster` mode, the local directories used by the Spark executors and the Spark driver will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored. In `yarn-client` mode, the Spark executors will use the local directories configured for YARN while the Spark driver will use those defined in `spark.local.dir`. This is because the Spark driver does not run on the YARN cluster in `yarn-client` mode, only the Spark executors do.
 - The `--files` and `--archives` options support specifying file names with the # similar to Hadoop. For example you can specify: `--files localtest.txt#appSees.txt` and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name `appSees.txt`, and your application should use the name as `appSees.txt` to reference it when running on YARN.
 - The `--jars` option allows the `SparkContext.addJar` function to work if you are using it with local files and running in `yarn-cluster` mode. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.