From 654c5121fd26a85036787882d3d2c3b56360b686 Mon Sep 17 00:00:00 2001 From: uncleGen Date: Tue, 21 Mar 2017 15:49:11 +0800 Subject: [PATCH 1/2] bug fix: window operator miss the `watermark` metadata of time column --- python/pyspark/sql/functions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 376b86ea69bd4..cdddd4ba1d00f 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1163,7 +1163,10 @@ def check_string_field(field, fieldName): raise TypeError("%s should be provided as a string" % fieldName) sc = SparkContext._active_spark_context - time_col = _to_java_column(timeColumn) + if isinstance(timeColumn, Column): + raise TypeError("timeColumn should be the name of time column, and provided as a string.") + else: + time_col = _to_java_column(timeColumn) check_string_field(windowDuration, "windowDuration") if slideDuration and startTime: check_string_field(slideDuration, "slideDuration") From 890c6e6416417febf8da9960633bb66cda5201c7 Mon Sep 17 00:00:00 2001 From: uncleGen Date: Tue, 21 Mar 2017 16:58:11 +0800 Subject: [PATCH 2/2] avoid to break api --- python/pyspark/sql/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index cdddd4ba1d00f..4d74aa33057aa 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1164,7 +1164,7 @@ def check_string_field(field, fieldName): sc = SparkContext._active_spark_context if isinstance(timeColumn, Column): - raise TypeError("timeColumn should be the name of time column, and provided as a string.") + time_col = _to_java_column(timeColumn._jc.toString().encode('utf8')) else: time_col = _to_java_column(timeColumn) check_string_field(windowDuration, "windowDuration")