Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8fc0aaa
first version passing all tests
collerek Apr 22, 2021
e5f3d9d
simplified update_table part1
collerek Apr 22, 2021
79d678d
fix comments, add values, add values_dict, add columns_dict
collerek Apr 25, 2021
93179bb
update readme
collerek Apr 25, 2021
88efae8
add subqueries names support
collerek Apr 26, 2021
84dac31
add extracting subqueries
collerek Apr 27, 2021
01bafa2
add test from #120
collerek Apr 27, 2021
273cf99
Update test/test_getting_tables.py
collerek Apr 29, 2021
262e843
Update sql_metadata/generalizator.py
collerek Apr 29, 2021
55cac5f
Update sql_metadata/keywords_lists.py
collerek Apr 29, 2021
dbe37f4
remove query setter, add check for it, remove sqlparse line in comple…
collerek Apr 29, 2021
f4bf00e
Merge branch 'master' into new_parser
macbre Apr 29, 2021
8046c9e
copy poetry.lock from master, adjust path in pyproject.toml, switch r…
collerek Apr 29, 2021
3f87cd7
Merge branch 'new_parser' of https://github.com/collerek/sql-metadata…
collerek Apr 29, 2021
ea02fa1
generalizator.py - rename "remove_comments" to "without_comments"
macbre Apr 29, 2021
6778cce
Update sql_metadata/generalizator.py
collerek Apr 29, 2021
1bb994d
change remove_comments -> without comments in parser, generalizator, …
collerek Apr 29, 2021
3dbf9ad
remove optional from init in parser
collerek Apr 29, 2021
0bcc27f
Merge branch 'master' into new_parser
macbre Apr 29, 2021
4aba9d5
Update test/test_normalization.py
collerek Apr 30, 2021
80830d4
Update sql_metadata/parser.py
collerek Apr 30, 2021
c36fc46
revert to not removing comment in preprocess
collerek Apr 30, 2021
6ffd308
Update test/test_query.py
macbre Apr 30, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
run: poetry run pytest -vv --cov=sql_metadata --cov-report=term

- name: Lint with pylint
run: poetry run pylint sql_metadata.py
run: poetry run pylint sql_metadata
Comment thread
collerek marked this conversation as resolved.

- name: Build a distribution package
run: poetry build -vvv
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ coverage:
poetry run pytest -vv --cov=sql_metadata --cov-report=term

lint:
poetry run pylint sql_metadata.py
poetry run pylint sql_metadata

publish:
# run git tag -a v0.0.0 before running make publish
Expand Down
191 changes: 166 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,48 +21,189 @@ Supported queries syntax:
pip install sql-metadata
```

### Extracting raw sql-metadata tokens

```python
from sql_metadata import Parser

# extract raw sql-metadata tokens
Parser("SELECT * FROM foo").tokens
# ['SELECT', '*', 'FROM', 'foo']
```

### Extracting columns from query

```python
>>> import sql_metadata
from sql_metadata import Parser

# get columns from query - for more examples see `tests/test_getting_columns.py`
Parser("SELECT test, id FROM foo, bar").columns
# ['test', 'id']

Parser("INSERT /* VoteHelper::addVote xxx */ INTO `page_vote` (article_id,user_id,`time`) VALUES ('442001','27574631','20180228130846')").columns
# ['article_id', 'user_id', 'time']

>>> sql_metadata.get_query_tokens("SELECT * FROM foo")
[<DML 'SELECT' at 0x7F14FFDEB808>, <Wildcard '*' at 0x7F14FFDEB940>, <Keyword 'FROM' at 0x7F14FFDEBBB0>, <Name 'foo' at 0x7F14FFDEB9A8>]
parser = Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b ON a.ip_address = b.ip_address")

>>> sql_metadata.get_query_columns("SELECT test, id FROM foo, bar")
[u'test', u'id']
# note that aliases are auto-resolved
parser.columns
# ['product_a.*', 'product_a.users.ip_address', 'product_b.users.ip_address']

>>> sql_metadata.get_query_tables("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b ON a.ip_address = b.ip_address")
['product_a.users', 'product_b.users']
# note that you can also extract columns with their place in the query
# which will return dict with lists divided into select, where, order_by, join, insert and update
parser.columns_dict
# {'select': ['product_a.users.*'], 'join': ['product_a.users.ip_address', 'product_b.users.ip_address']}
```

### Extracting tables from query

>>> sql_metadata.get_query_columns("INSERT /* VoteHelper::addVote xxx */ INTO `page_vote` (article_id,user_id,`time`) VALUES ('442001','27574631','20180228130846')")
['article_id', 'user_id', 'time']
```python
from sql_metadata import Parser

>>> sql_metadata.get_query_columns("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b ON a.ip_address = b.ip_address")
['a.*', 'a.ip_address', 'b.ip_address']
# get tables from query - for more examples see `tests/test_getting_tables.py`
Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b ON a.ip_address = b.ip_address").tables
# ['product_a.users', 'product_b.users']

>>> sql_metadata.get_query_tables("SELECT test, id FROM foo, bar")
[u'foo', u'bar']
Parser("SELECT test, id FROM foo, bar").tables
# ['foo', 'bar']

>>> sql_metadata.get_query_limit_and_offset('SELECT foo_limit FROM bar_offset LIMIT 50 OFFSET 1000')
(50, 1000)
# you can also extract aliases of the tables as a dictionary
parser = Parser("SELECT f.test FROM foo AS f")

>>> sql_metadata.get_query_limit_and_offset('SELECT foo_limit FROM bar_offset limit 2000,50')
(50, 2000)
# get table aliases
parser.tables_aliases
# {'f': 'foo'}

>>> sql_metadata.get_query_table_aliases("SELECT test FROM foo AS f")
{'f': 'foo'}
# note that aliases are auto-resolved for columns
parser.columns
# ["foo.test"]
```

> See `test/test_query.py` file for more examples of a bit more complex queries.
### Extracting values from query
```python
from sql_metadata import Parser

parser = Parser(
"INSERT /* VoteHelper::addVote xxx */ INTO `page_vote` (article_id,user_id,`time`) "
"VALUES ('442001','27574631','20180228130846')"
)
# extract values from query
parser.values
# ["442001", "27574631", "20180228130846"]

# extract a dictionary with column-value pairs
parser.values_dict
#{"article_id": "442001", "user_id": "27574631", "time": "20180228130846"}

# if column names are not set auto-add placeholders
parser = Parser(
"INSERT IGNORE INTO `table` VALUES (9, 2.15, '123', '2017-01-01');"
)
parser.values
# [9, 2.15, "123", "2017-01-01"]

parser.values_dict
#{"column_1": 9, "column_2": 2.15, "column_3": "123", "column_4": "2017-01-01"}
```

### Queries normalization

### Extracting limit and offset
```python
>>> from sql_metadata import generalize_sql
>>> generalize_sql('SELECT /* Test */ foo FROM bar WHERE id in (1, 2, 56)')
'SELECT foo FROM bar WHERE id in (XYZ)'
from sql_metadata import Parser

Parser('SELECT foo_limit FROM bar_offset LIMIT 50 OFFSET 1000').limit_and_offset
# (50, 1000)

Parser('SELECT foo_limit FROM bar_offset limit 2000,50').limit_and_offset
# (50, 2000)
```

### Extracting with names

```python
from sql_metadata import Parser

parser = Parser(
"""
WITH
database1.tableFromWith AS (SELECT aa.* FROM table3 as aa
left join table4 on aa.col1=table4.col2),
test as (select * from table3)
SELECT
"xxxxx"
FROM
database1.tableFromWith alias
LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx")
"""
)

# get names/ aliases of with statements
parser.with_names
# ["database1.tableFromWith", "test"]

# note that names of with statements do not appear in tables
parser.tables
# ["table3", "table4", "database2.table2"]
```

### Extracting sub-queries

```python
from sql_metadata import Parser

parser = Parser(
"""
SELECT COUNT(1) FROM
(SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1) a
JOIN (SELECT st.task_id FROM some_task st WHERE task_type_id = 80) b
ON a.task_id = b.task_id;
"""
)

# get sub-queries dictionary
parser.subqueries
# {"a": "SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1",
# "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80"}


# get names/ aliases of sub-queries / derived tables
parser.subqueries_names
# ["a", "b"]

# note that you can also exclude columns coming from sub-queries
# all columns
parser.columns
#["some_task_detail.task_id", "some_task_detail.STATUS", "some_task.task_id",
# "task_type_id", "a.task_id", "b.task_id"]

# without subqueries
parser.columns_without_subqueries
#["some_task_detail.task_id", "some_task_detail.STATUS", "some_task.task_id",
# "task_type_id"]
```

See `tests` file for more examples of a bit more complex queries.

### Queries normalization and comments extraction

```python
from sql_metadata import Parser
parser = Parser('SELECT /* Test */ foo FROM bar WHERE id in (1, 2, 56)')

# generalize query
parser.generalize
# 'SELECT foo FROM bar WHERE id in (XYZ)'

# remove comments
parser.without_comments
# 'SELECT foo FROM bar WHERE id in (1, 2, 56)'

# extract comments
parser.comments
# ['/* Test */']
```

> See `test/test_normalization.py` file for more examples of a bit more complex queries.
See `test/test_normalization.py` file for more examples of a bit more complex queries.

## Stargazers over time

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ homepage = "https://github.com/macbre/sql-metadata"
repository = "https://github.com/macbre/sql-metadata"

packages = [
{ include="sql_metadata.py" }
{ include="sql_metadata" }
]

[tool.poetry.dependencies]
Expand Down
Loading