From c4d1ec9f56eab81e3d84f3d0c4291b3652a20d07 Mon Sep 17 00:00:00 2001 From: Byunk Date: Mon, 16 Oct 2023 22:25:57 +0900 Subject: [PATCH 1/6] fix #394 and add test --- sql_metadata/generalizator.py | 3 ++- sql_metadata/parser.py | 2 +- test/test_comments.py | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sql_metadata/generalizator.py b/sql_metadata/generalizator.py index 5cdb0937..3e993069 100644 --- a/sql_metadata/generalizator.py +++ b/sql_metadata/generalizator.py @@ -47,7 +47,8 @@ def without_comments(self) -> str: :rtype: str """ sql = sqlparse.format(self._raw_query, strip_comments=True) - sql = re.sub(r"\s{2,}", " ", sql) + sql = sql.replace('\n', ' ') + sql = re.sub(r"[ \t]+", " ", sql) return sql @property diff --git a/sql_metadata/parser.py b/sql_metadata/parser.py index 96371100..dd3eddeb 100644 --- a/sql_metadata/parser.py +++ b/sql_metadata/parser.py @@ -622,7 +622,7 @@ def without_comments(self) -> str: """ Removes comments from SQL query """ - return Generalizator(self.query).without_comments + return Generalizator(self._raw_query).without_comments @property def generalize(self) -> str: diff --git a/test/test_comments.py b/test/test_comments.py index b41380f8..7406517d 100644 --- a/test/test_comments.py +++ b/test/test_comments.py @@ -205,3 +205,11 @@ def test_next_token_not_comment_on_non_comments(): select_tok.next_token.next_token == select_tok.next_token_not_comment.next_token_not_comment ) + + +def test_without_comments_for_multiline_query(): + query = """SELECT * -- comment + FROM table + WHERE table.id = '123'""" + parser = Parser(query) + assert parser.without_comments == """SELECT * FROM table WHERE table.id = '123'""" \ No newline at end of file From 70a56599a137945347c3dde82bd991cf6e9d3f65 Mon Sep 17 00:00:00 2001 From: Byunk Date: Mon, 16 Oct 2023 22:55:35 +0900 Subject: [PATCH 2/6] fix #390 and add test --- sql_metadata/parser.py | 2 +- sql_metadata/token.py | 9 +++++++++ test/test_getting_columns.py | 7 +++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sql_metadata/parser.py b/sql_metadata/parser.py index dd3eddeb..da3cbaa4 100644 --- a/sql_metadata/parser.py +++ b/sql_metadata/parser.py @@ -865,7 +865,7 @@ def _determine_opening_parenthesis_type(self, token: SQLToken): # we are in columns and in a column subquery definition token.is_column_definition_start = True elif ( - token.previous_token.is_as_keyword + token.previous_token_not_comment.is_as_keyword and token.last_keyword_normalized != "WINDOW" ): # window clause also contains AS keyword, but it is not a query diff --git a/sql_metadata/token.py b/sql_metadata/token.py index 8a556eab..694d93ce 100644 --- a/sql_metadata/token.py +++ b/sql_metadata/token.py @@ -369,6 +369,15 @@ def next_token_not_comment(self): return self.next_token.next_token_not_comment return self.next_token + @property + def previous_token_not_comment(self): + """ + Property returning previous non-comment token + """ + if self.previous_token and self.previous_token.is_comment: + return self.previous_token.previous_token_not_comment + return self.previous_token + def is_constraint_definition_inside_create_table_clause( self, query_type: str ) -> bool: diff --git a/test/test_getting_columns.py b/test/test_getting_columns.py index 09a02197..68fb81ac 100644 --- a/test/test_getting_columns.py +++ b/test/test_getting_columns.py @@ -264,6 +264,13 @@ def test_columns_with_comments(): "order_by": ["cl_sortkey"], } + parser = Parser( + """WITH aa AS --sdfsdfsdf + (SELECT C1, C2 FROM T1) + SELECT C1, C2 FROM aa""" + ) + assert parser.columns == ["C1", "C2"] + assert parser.columns_dict == {"select": ["C1", "C2"]} def test_columns_with_keyword_aliases(): parser = Parser( From 9ff864311dec74b0b676dcf11d0b6bc839d834de Mon Sep 17 00:00:00 2001 From: Byunk Date: Mon, 16 Oct 2023 23:15:05 +0900 Subject: [PATCH 3/6] fix #377 and add test --- sql_metadata/parser.py | 10 ++++++++-- test/test_getting_columns.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/sql_metadata/parser.py b/sql_metadata/parser.py index da3cbaa4..b0515961 100644 --- a/sql_metadata/parser.py +++ b/sql_metadata/parser.py @@ -533,12 +533,18 @@ def subqueries(self) -> Dict: ): current_subquery.append(inner_token) inner_token = inner_token.next_token + + query_name = None if inner_token.next_token.value in self.subqueries_names: query_name = inner_token.next_token.value - else: + elif inner_token.next_token.is_as_keyword: query_name = inner_token.next_token.next_token.value + elif inner_token.next_token.is_name: + query_name = inner_token.next_token.value + subquery_text = "".join([x.stringified_token for x in current_subquery]) - subqueries[query_name] = subquery_text + if query_name is not None: + subqueries[query_name] = subquery_text token = token.next_token diff --git a/test/test_getting_columns.py b/test/test_getting_columns.py index 68fb81ac..e0b7380a 100644 --- a/test/test_getting_columns.py +++ b/test/test_getting_columns.py @@ -484,3 +484,36 @@ def test_having_columns(): "group_by": ["Country"], "having": ["CustomerID"], } + +def test_nested_queries(): + query = """ + SELECT max(dt) FROM + ( + SELECT max(dt) as dt FROM t + UNION ALL + SELECT max(dt) as dt FROM t2 + ) + """ + parser = Parser(query) + assert parser.columns == ["dt"] + assert parser.columns_dict == {"select": ["dt"]} + + query = """ + SELECT max(dt) FROM + ( + SELECT max(dt) as dt FROM t + ) + """ + parser = Parser(query) + assert parser.columns == ["dt"] + assert parser.columns_dict == {"select": ["dt"]} + + query = """ + SELECT max(dt) FROM + ( + SELECT dt FROM t + ) + """ + parser = Parser(query) + assert parser.columns == ["dt"] + assert parser.columns_dict == {"select": ["dt"]} From 73d4b4f478ccb2d8785e7ca6d5ab6199b07efab1 Mon Sep 17 00:00:00 2001 From: Byunk Date: Tue, 17 Oct 2023 00:47:27 +0900 Subject: [PATCH 4/6] fix 'NoneType' object has no attribute 'normalized' --- sql_metadata/token.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql_metadata/token.py b/sql_metadata/token.py index 694d93ce..544f709a 100644 --- a/sql_metadata/token.py +++ b/sql_metadata/token.py @@ -228,6 +228,10 @@ def is_alias_of_self(self) -> bool: end_of_column = end_of_column.find_nearest_token( [",", "FROM"], value_attribute="normalized", direction="right" ) + + if end_of_column is EmptyToken: + return False + return end_of_column.previous_token.normalized == self.normalized @property From 9607b8f849a000f1773c6e9f05a2b78725296371 Mon Sep 17 00:00:00 2001 From: Byunk Date: Tue, 17 Oct 2023 00:49:00 +0900 Subject: [PATCH 5/6] formatted by black --- sql_metadata/generalizator.py | 2 +- sql_metadata/token.py | 2 +- test/test_comments.py | 2 +- test/test_getting_columns.py | 2 ++ 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sql_metadata/generalizator.py b/sql_metadata/generalizator.py index 3e993069..8f0678c7 100644 --- a/sql_metadata/generalizator.py +++ b/sql_metadata/generalizator.py @@ -47,7 +47,7 @@ def without_comments(self) -> str: :rtype: str """ sql = sqlparse.format(self._raw_query, strip_comments=True) - sql = sql.replace('\n', ' ') + sql = sql.replace("\n", " ") sql = re.sub(r"[ \t]+", " ", sql) return sql diff --git a/sql_metadata/token.py b/sql_metadata/token.py index 544f709a..16a3c30b 100644 --- a/sql_metadata/token.py +++ b/sql_metadata/token.py @@ -228,7 +228,7 @@ def is_alias_of_self(self) -> bool: end_of_column = end_of_column.find_nearest_token( [",", "FROM"], value_attribute="normalized", direction="right" ) - + if end_of_column is EmptyToken: return False diff --git a/test/test_comments.py b/test/test_comments.py index 7406517d..9a93bb5a 100644 --- a/test/test_comments.py +++ b/test/test_comments.py @@ -212,4 +212,4 @@ def test_without_comments_for_multiline_query(): FROM table WHERE table.id = '123'""" parser = Parser(query) - assert parser.without_comments == """SELECT * FROM table WHERE table.id = '123'""" \ No newline at end of file + assert parser.without_comments == """SELECT * FROM table WHERE table.id = '123'""" diff --git a/test/test_getting_columns.py b/test/test_getting_columns.py index e0b7380a..8f6c20b5 100644 --- a/test/test_getting_columns.py +++ b/test/test_getting_columns.py @@ -272,6 +272,7 @@ def test_columns_with_comments(): assert parser.columns == ["C1", "C2"] assert parser.columns_dict == {"select": ["C1", "C2"]} + def test_columns_with_keyword_aliases(): parser = Parser( "SELECT date_format(time_id,'%Y-%m-%d') AS date, pageviews AS cnt " @@ -485,6 +486,7 @@ def test_having_columns(): "having": ["CustomerID"], } + def test_nested_queries(): query = """ SELECT max(dt) FROM From ce38e4392027fa8a816397f4c63a872ad7504847 Mon Sep 17 00:00:00 2001 From: Byunk Date: Tue, 17 Oct 2023 22:37:55 +0900 Subject: [PATCH 6/6] remove unnecessaries --- sql_metadata/parser.py | 2 -- sql_metadata/token.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/sql_metadata/parser.py b/sql_metadata/parser.py index b0515961..a3d3a289 100644 --- a/sql_metadata/parser.py +++ b/sql_metadata/parser.py @@ -539,8 +539,6 @@ def subqueries(self) -> Dict: query_name = inner_token.next_token.value elif inner_token.next_token.is_as_keyword: query_name = inner_token.next_token.next_token.value - elif inner_token.next_token.is_name: - query_name = inner_token.next_token.value subquery_text = "".join([x.stringified_token for x in current_subquery]) if query_name is not None: diff --git a/sql_metadata/token.py b/sql_metadata/token.py index 16a3c30b..694d93ce 100644 --- a/sql_metadata/token.py +++ b/sql_metadata/token.py @@ -228,10 +228,6 @@ def is_alias_of_self(self) -> bool: end_of_column = end_of_column.find_nearest_token( [",", "FROM"], value_attribute="normalized", direction="right" ) - - if end_of_column is EmptyToken: - return False - return end_of_column.previous_token.normalized == self.normalized @property