From 4ea67fe75c2a47ef50ae82b0a31129cf7af4aef9 Mon Sep 17 00:00:00 2001 From: collerek Date: Thu, 10 Jun 2021 13:03:00 +0200 Subject: [PATCH 1/2] fix parsing with clauses, fix for #161 --- sql_metadata/parser.py | 14 +++------- test/test_with_statements.py | 52 ++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/sql_metadata/parser.py b/sql_metadata/parser.py index 869720bd..6d4d5e74 100644 --- a/sql_metadata/parser.py +++ b/sql_metadata/parser.py @@ -537,19 +537,11 @@ def with_names(self) -> List[str]: with_names.append(prev_token.left_expanded) else: with_names.append(token.left_expanded) - # move to next with if exists, this with ends with - # ) + , if many withs or ) + SELECT if one - # need to move to next as AS can be in - # sub-queries inside with definition - while token.next_token and not ( - token.is_right_parenthesis - and ( - token.next_token.is_punctuation - or token.next_token.normalized in WITH_ENDING_KEYWORDS - ) - ): + # move to next with query end + while token.next_token and not (token.is_with_query_end): token = token.next_token if token.next_token.normalized in WITH_ENDING_KEYWORDS: + # end of with block self._is_in_with_block = False else: token = token.next_token diff --git a/test/test_with_statements.py b/test/test_with_statements.py index 64371196..6ad04c9f 100644 --- a/test/test_with_statements.py +++ b/test/test_with_statements.py @@ -90,3 +90,55 @@ def test_multiple_with_statements_with_with_columns(): assert parser.columns_aliases_names == ["c1", "c2", "c3", "c4"] assert parser.columns_aliases == {"c1": "*", "c2": "*", "c3": "c5", "c4": "c6"} assert parser.query_type == QueryType.SELECT + + +def test_complicated_with(): + query = """ + WITH uisd_filter_table as ( + select + session_id, + srch_id, + srch_ci, + srch_co, + srch_los, + srch_sort_type, + impr_list + from + uisd + where + datem <= date_sub(date_add(current_date(), 92), 7 * 52) + and lower(srch_sort_type) in ('expertpicks', 'recommended') + and srch_ci <= date_sub(date_add(current_date(), 92), 7 * 52) + and srch_co >= date_sub(date_add(current_date(), 1), 7 * 52) + ) + select + DISTINCT session_id, + srch_id, + srch_ci, + srch_co, + srch_los, + srch_sort_type, + l.impr_property_id as expe_property_id, + l.impr_position_across_pages + from + uisd_filter_table lateral view explode(impr_list) table as l + """ + parser = Parser(query) + assert parser.query_type == QueryType.SELECT + assert parser.with_names == ["uisd_filter_table"] + assert parser.tables == [ + "uisd", + "impr_list", + ] # this one is wrong too should be table + assert parser.columns == [ + "session_id", + "srch_id", + "srch_ci", + "srch_co", + "srch_los", + "srch_sort_type", + "impr_list", + "datem", + "l.impr_property_id", + "l.impr_position_across_pages", + ] From a2ed841c91d2972e887c069b6ea4ece47f3d8d6e Mon Sep 17 00:00:00 2001 From: collerek Date: Thu, 10 Jun 2021 13:05:26 +0200 Subject: [PATCH 2/2] fix lint --- sql_metadata/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql_metadata/parser.py b/sql_metadata/parser.py index 6d4d5e74..548b9112 100644 --- a/sql_metadata/parser.py +++ b/sql_metadata/parser.py @@ -538,7 +538,7 @@ def with_names(self) -> List[str]: else: with_names.append(token.left_expanded) # move to next with query end - while token.next_token and not (token.is_with_query_end): + while token.next_token and not token.is_with_query_end: token = token.next_token if token.next_token.normalized in WITH_ENDING_KEYWORDS: # end of with block