From fafffc88f007e0899f3b683bea2e897edf3ce9a5 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Tue, 24 Jun 2014 14:56:36 -0400 Subject: [PATCH] [SPARK-2244] Fix hang introduced by SPARK-1466 The fix to SPARK-1466 (sha 38702487) opens a buffer for stderr, but does not drain it under normal operation. The result is an eventual hang during IPC. The fix here is to close stderr after it is no longer used. Related, but not addressed here, SPARK-1466 also removes stderr from the console in the pyspark shell. It should be reintroduced with a -verbose option. --- python/pyspark/java_gateway.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 19235d5f79f85..0a2fddebdb891 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -56,6 +56,10 @@ def preexec_func(): raise Exception("Launching GatewayServer failed with exit code %d: %s" % (error_code, "".join(proc.stderr.readlines()))) + # close stderr, otherwise it fills an internal buffer and causes SPARK-2244 + # TODO: work out a -verbose option and create EchoOutputThread(proc.stderr).start() instead + proc.stderr.close() + # Create a thread to echo output from the GatewayServer, which is required # for Java log output to show up: class EchoOutputThread(Thread):