From 36ea45b277aadc3e06b11c3713af5a19e9e7b32c Mon Sep 17 00:00:00 2001 From: Hafizur Rahman Date: Sat, 10 Sep 2016 01:51:26 +0900 Subject: [PATCH 1/4] Allow matplotlib plot display in pyspark context --- .../main/resources/python/zeppelin_pyspark.py | 49 ++++++++++++++++--- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/spark/src/main/resources/python/zeppelin_pyspark.py b/spark/src/main/resources/python/zeppelin_pyspark.py index 3e6535fa4f9..e3a571a29a6 100644 --- a/spark/src/main/resources/python/zeppelin_pyspark.py +++ b/spark/src/main/resources/python/zeppelin_pyspark.py @@ -30,6 +30,13 @@ import ast import traceback +import base64 +from io import BytesIO +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + # for back compatibility from pyspark.sql import SQLContext, HiveContext, Row @@ -51,12 +58,42 @@ class PyZeppelinContext(dict): def __init__(self, zc): self.z = zc - def show(self, obj): - from pyspark.sql import DataFrame - if isinstance(obj, DataFrame): - print(gateway.jvm.org.apache.zeppelin.spark.ZeppelinContext.showDF(self.z, obj._jdf)) - else: - print(str(obj)) + + def show(self, p, **kwargs): + from pyspark.sql import DataFrame + + if hasattr(p, '__name__') and p.__name__ == "matplotlib.pyplot": + self.show_matplotlib(p, **kwargs) + elif isinstance(p, DataFrame): + print(gateway.jvm.org.apache.zeppelin.spark.ZeppelinContext.showDF(self.z, obj._jdf)) + elif hasattr(p, '__call__'): + p() #error reporting + else: + print(str(obj)) + + def show_matplotlib(self, p, fmt="png", width="auto", height="auto", + **kwargs): + """Matplotlib show function + """ + if fmt == "png": + img = BytesIO() + p.savefig(img, format=fmt) + img_str = b"data:image/png;base64," + img_str += base64.b64encode(img.getvalue().strip()) + img_tag = "" + # Decoding is necessary for Python 3 compability + img_str = img_str.decode("ascii") + img_str = img_tag.format(img=img_str, width=width, height=height) + elif fmt == "svg": + img = StringIO() + p.savefig(img, format=fmt) + img_str = img.getvalue() + else: + raise ValueError("fmt must be 'png' or 'svg'") + + html = "%html
{img}
" + print(html.format(width=width, height=height, img=img_str)) + img.close() # By implementing special methods it makes operating on it more Pythonic def __setitem__(self, key, item): From 5f794a51f2973223e7e65dc4f0a81facabc8305e Mon Sep 17 00:00:00 2001 From: Hafizur Rahman Date: Wed, 14 Sep 2016 20:59:01 +0900 Subject: [PATCH 2/4] Allow matplotlib plot display in pyspark context Fix formatting --- .../main/resources/python/zeppelin_pyspark.py | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/spark/src/main/resources/python/zeppelin_pyspark.py b/spark/src/main/resources/python/zeppelin_pyspark.py index e3a571a29a6..855eeb149f5 100644 --- a/spark/src/main/resources/python/zeppelin_pyspark.py +++ b/spark/src/main/resources/python/zeppelin_pyspark.py @@ -33,9 +33,9 @@ import base64 from io import BytesIO try: - from StringIO import StringIO + from StringIO import StringIO except ImportError: - from io import StringIO + from io import StringIO # for back compatibility from pyspark.sql import SQLContext, HiveContext, Row @@ -60,40 +60,40 @@ def __init__(self, zc): def show(self, p, **kwargs): - from pyspark.sql import DataFrame - - if hasattr(p, '__name__') and p.__name__ == "matplotlib.pyplot": - self.show_matplotlib(p, **kwargs) - elif isinstance(p, DataFrame): - print(gateway.jvm.org.apache.zeppelin.spark.ZeppelinContext.showDF(self.z, obj._jdf)) - elif hasattr(p, '__call__'): - p() #error reporting - else: - print(str(obj)) + from pyspark.sql import DataFrame + + if isinstance(p, DataFrame): + print(gateway.jvm.org.apache.zeppelin.spark.ZeppelinContext.showDF(self.z, obj._jdf)) + elif hasattr(p, '__name__') and p.__name__ == "matplotlib.pyplot": + self.show_matplotlib(p, **kwargs) + elif hasattr(p, '__call__'): + p() #error reporting + else: + print(str(obj)) def show_matplotlib(self, p, fmt="png", width="auto", height="auto", **kwargs): - """Matplotlib show function - """ - if fmt == "png": - img = BytesIO() - p.savefig(img, format=fmt) - img_str = b"data:image/png;base64," - img_str += base64.b64encode(img.getvalue().strip()) - img_tag = "" - # Decoding is necessary for Python 3 compability - img_str = img_str.decode("ascii") - img_str = img_tag.format(img=img_str, width=width, height=height) - elif fmt == "svg": - img = StringIO() - p.savefig(img, format=fmt) - img_str = img.getvalue() - else: - raise ValueError("fmt must be 'png' or 'svg'") - - html = "%html
{img}
" - print(html.format(width=width, height=height, img=img_str)) - img.close() + """Matplotlib show function + """ + if fmt == "png": + img = BytesIO() + p.savefig(img, format=fmt) + img_str = b"data:image/png;base64," + img_str += base64.b64encode(img.getvalue().strip()) + img_tag = "" + # Decoding is necessary for Python 3 compability + img_str = img_str.decode("ascii") + img_str = img_tag.format(img=img_str, width=width, height=height) + elif fmt == "svg": + img = StringIO() + p.savefig(img, format=fmt) + img_str = img.getvalue() + else: + raise ValueError("fmt must be 'png' or 'svg'") + + html = "%html
{img}
" + print(html.format(width=width, height=height, img=img_str)) + img.close() # By implementing special methods it makes operating on it more Pythonic def __setitem__(self, key, item): From e83ffae8b49969ba6df1ea154d666f8d17dcf07a Mon Sep 17 00:00:00 2001 From: Hafizur Rahman Date: Wed, 14 Sep 2016 21:22:27 +0900 Subject: [PATCH 3/4] Allow matplotlib plot display in pyspark context Fix variable name --- spark/src/main/resources/python/zeppelin_pyspark.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/spark/src/main/resources/python/zeppelin_pyspark.py b/spark/src/main/resources/python/zeppelin_pyspark.py index 855eeb149f5..3f843f2d0ce 100644 --- a/spark/src/main/resources/python/zeppelin_pyspark.py +++ b/spark/src/main/resources/python/zeppelin_pyspark.py @@ -59,15 +59,15 @@ def __init__(self, zc): self.z = zc - def show(self, p, **kwargs): + def show(self, obj, **kwargs): from pyspark.sql import DataFrame - if isinstance(p, DataFrame): + if isinstance(obj, DataFrame): print(gateway.jvm.org.apache.zeppelin.spark.ZeppelinContext.showDF(self.z, obj._jdf)) - elif hasattr(p, '__name__') and p.__name__ == "matplotlib.pyplot": - self.show_matplotlib(p, **kwargs) - elif hasattr(p, '__call__'): - p() #error reporting + elif hasattr(obj, '__name__') and obj.__name__ == "matplotlib.pyplot": + self.show_matplotlib(obj, **kwargs) + elif hasattr(obj, '__call__'): + obj() #error reporting else: print(str(obj)) From 3151ff0fb4c3ca8a0177d2bfdedb0db18db39dd5 Mon Sep 17 00:00:00 2001 From: Hafizur Rahman Date: Wed, 14 Sep 2016 22:06:56 +0900 Subject: [PATCH 4/4] Allow matplotlib plot display in pyspark context Fix indenting --- spark/src/main/resources/python/zeppelin_pyspark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/main/resources/python/zeppelin_pyspark.py b/spark/src/main/resources/python/zeppelin_pyspark.py index 3f843f2d0ce..e40f928e6ba 100644 --- a/spark/src/main/resources/python/zeppelin_pyspark.py +++ b/spark/src/main/resources/python/zeppelin_pyspark.py @@ -77,7 +77,7 @@ def show_matplotlib(self, p, fmt="png", width="auto", height="auto", """ if fmt == "png": img = BytesIO() - p.savefig(img, format=fmt) + p.savefig(img, format=fmt) img_str = b"data:image/png;base64," img_str += base64.b64encode(img.getvalue().strip()) img_tag = ""