Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 3 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ This will use the function [Return_Zero_Row_Tables](https://curts0.github.io/PyT
```python
import pytabular
model = pytabular.Tabular(CONNECTION_STR)
tables = pytabular.Return_Zero_Row_Tables(model)
tables = model.Tables.Find_Zero_Rows()
if len(tables) > 0:
model.Refresh(tables, Tracing = True) #Add a trace in there for some fun.
model.Refresh(tables)
```

#### Sneak in a refresh.
Expand All @@ -139,7 +139,7 @@ This will use the function [Table_Last_Refresh_Times](https://curts0.github.io/P
```python
import pytabular
model = pytabular.Tabular(CONNECTION_STR)
df = pytabular.Table_Last_Refresh_Times(model, group_partition = False)
df = model.Tables.Last_Refresh()
model.Create_Table(df, 'Refresh Times')
```

Expand All @@ -157,19 +157,6 @@ if len(results) > 0:
#Revert deployment here!
```

#### Backup & Revert a Table.
USE WITH CAUTION, obviously not in PROD. I have been experimenting with this concept. Made for selfish reasons. Will probably get removed and I'll keep in my own local version. But fun to work with. Uses two methods. [Backup_Table](https://curts0.github.io/PyTabular/Tabular/#backup_table) and [Revert_Table](https://curts0.github.io/PyTabular/Tabular/#revert_table)

```python
import pytabular
model = pytabular.Tabular(CONNECTION_STR)
model.Backup_Table('TableName') #This will backup the table with surround items (columns,measures,relationships,roles,hierarchies,etc.) and will add a suffix of '_backup'
#-----------#
#Make any changes to your original table and then revert or delete backup as necessary
#-----------#
model.Revert_Table('TableName') #This will essentially replace your original with _backup
```

#### Loop through and query Dax files
Let's say you have multiple dax queries you would like to store and run through as checks. The [Query](https://curts0.github.io/PyTabular/Tabular/#query) method on the Tabular class can also take file paths. Can really be any file type as it's just checking os.path.isfile(). But would suggest .dax or .txt. It will read the file that use that as the new Query_str argument.
```python
Expand Down
9 changes: 8 additions & 1 deletion mkgendocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pages:
classes:
- Base_Trace
- Refresh_Trace
- Query_Monitor
- page: "Best Practice Analyzer.md"
source: 'pytabular/best_practice_analyzer.py'
functions:
Expand Down Expand Up @@ -69,4 +70,10 @@ pages:
- pandas_datatype_to_tabular_datatype
- pd_dataframe_to_m_expression
- remove_folder_and_contents
- remove_suffix
- remove_suffix
- remove_file
- sql_wrap_count_around_query
- get_sub_list
- get_value_to_df
- dataframe_to_dict
- dict_to_markdown_table
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "python_tabular"
version = "0.2.8"
version = "0.2.9"
authors = [
{ name="Curtis Stallings", email="curtisrstallings@gmail.com" },
]
Expand Down
4 changes: 2 additions & 2 deletions pytabular/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@
from .pytabular import Tabular

from .basic_checks import (
Return_Zero_Row_Tables,
Table_Last_Refresh_Times,
BPA_Violations_To_DF,
Last_X_Interval,
Return_Zero_Row_Tables,
Table_Last_Refresh_Times,
)
from .logic_utils import (
pd_dataframe_to_m_expression,
Expand Down
22 changes: 16 additions & 6 deletions pytabular/basic_checks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
"""
All basic checks will eventually be deprecated for more intuitive methods in the right classes.
For example, instead of calling the function `Return_Zero_Row_Tables(model)`,
call the Find_Zero_Rows() method in the PyTables class.
That way you can dynamically run those methods on a subset of tables,
instead of only on the entire model.
"""
import logging
from typing import List, Union
import pytabular
Expand All @@ -11,16 +18,17 @@

def Return_Zero_Row_Tables(model: pytabular.Tabular) -> List[str]:
"""Returns list of table names of those that are returning isna()

Args:
model (pytabular.Tabular): Tabular Model

Returns:
List[str]: List of table names where DAX COUNTROWS('Table Name') is nan or 0.
"""
logger.info(f"Executing Basic Function {sys._getframe(0).f_code.co_name}")
logger.warning(
"Return_Zero_Row_Tables() will be deprecated. \
Instead use Zero_Row_Tables() through the PyTables class."
)
query_function: str = "COUNTROWS(_)"
df: pd.DataFrame = model.Query_Every_Table(query_function)
df: pd.DataFrame = model.Tables.Query_All(query_function)
return df[df[f"[{query_function}]"].isna()]["[Table]"].to_list()


Expand All @@ -31,16 +39,18 @@ def Table_Last_Refresh_Times(
Optional 'group_partition' variable, default is True.
If False an extra column will be include to have the last refresh time to the grain of the partition
Example to add to model model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes')

Args:
model (pytabular.Tabular): Tabular Model
group_partition (bool, optional): Whether or not you want the grain of the dataframe to be by table or by partition. Defaults to True.

Returns:
pd.DataFrame: pd dataframe with the RefreshedTime property: https://docs.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.partition.refreshedtime?view=analysisservices-dotnet#microsoft-analysisservices-tabular-partition-refreshedtime
If group_partition == True and the table has multiple partitions, then df.groupby(by["tables"]).max()
"""
logger.info(f"Executing Basic Function {sys._getframe(0).f_code.co_name}")
logger.warning(
f"{sys._getframe(0).f_code.co_name} will be deprecated... Use Last_Refresh in PyTables class instead!"
)

data = {
"Tables": [partition.Table.Name for partition in model.Partitions],
"Partitions": [partition.Name for partition in model.Partitions],
Expand Down
25 changes: 24 additions & 1 deletion pytabular/column.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import pandas as pd
from object import PyObject, PyObjects

from Microsoft.AnalysisServices.Tabular import ColumnType

logger = logging.getLogger("PyTabular")

Expand Down Expand Up @@ -91,3 +91,26 @@ def Values(self) -> pd.DataFrame:
class PyColumns(PyObjects):
def __init__(self, objects) -> None:
super().__init__(objects)

def Query_All(self, query_function: str = "COUNTROWS(VALUES(_))") -> pd.DataFrame:
"""This will dynamically create a query to pull all columns from the model and run the query function. It will replace the _ with the column to run.

Args:
query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions.

Returns:
pd.DataFrame: Returns dataframe with results.
"""
logger.info("Beginning execution of querying every column...")
logger.debug(f"Function to be run: {query_function}")
logger.debug("Dynamically creating DAX query...")
query_str = "EVALUATE UNION(\n"
columns = [column for column in self]
for column in columns:
if column.Type != ColumnType.RowNumber:
table_name = column.Table.get_Name()
column_name = column.get_Name()
dax_identifier = f"'{table_name}'[{column_name}]"
query_str += f"ROW(\"Table\",\"{table_name}\",\"Column\",\"{column_name}\",\"{query_function}\",{query_function.replace('_',dax_identifier)}),\n"
query_str = f"{query_str[:-2]})"
return self[0].Table.Model.Query(query_str)
49 changes: 49 additions & 0 deletions pytabular/logic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,52 @@ def get_value_to_df(Query: AdomdDataReader, index: int):
return Query.GetValue(index).ToDouble(Query.GetValue(index))
else:
return Query.GetValue(index)


def dataframe_to_dict(df):
"""
Convert to Dataframe to dictionary and
alter columns names with;
- Underscores (_) to spaces
- All Strings are converted to Title Case.
"""
list_of_dicts = df.to_dict("records")
return [
{k.replace("_", " ").title(): v for k, v in dict.items()}
for dict in list_of_dicts
]


def dict_to_markdown_table(list_of_dicts: list, columns_to_include: list = None):
"""
Description: Generate a Markdown table based on a list of dictionaries.
Args:
list_of_dicts -> List of Dictionaries that need to be converted
to a markdown table.
columns_to_include -> Default = None, and all colums are included.
If a list is supplied, those columns will be included.
Example:
columns = ['Referenced Object Type', 'Referenced Table', 'Referenced Object']
dict_to_markdown_table(dependancies, columns)

Result:
| Referenced Object Type | Referenced Table | Referenced Object |
| ---------------------- | ---------------- | ------------------------------- |
| TABLE | Cases | Cases |
| COLUMN | Cases | IsClosed |
| CALC_COLUMN | Cases | Resolution Time (Working Hours) |

"""
keys = set().union(*[set(d.keys()) for d in list_of_dicts])

if columns_to_include is not None:
keys = list(keys.intersection(columns_to_include))

table_header = f"| {' | '.join(map(str, keys))} |"
table_header_separator = "|-----" * len(keys) + "|"
markdown_table = [table_header, table_header_separator]

for row in list_of_dicts:
table_row = f"| {' | '.join(str(row.get(key, '')) for key in keys)} |"
markdown_table.append(table_row)
return "\n".join(markdown_table)
10 changes: 6 additions & 4 deletions pytabular/pytabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,14 +463,15 @@ def Query_Every_Column(
self, query_function: str = "COUNTROWS(VALUES(_))"
) -> pd.DataFrame:
"""This will dynamically create a query to pull all columns from the model and run the query function. It will replace the _ with the column to run.

Args:
query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions.

Returns:
pd.DataFrame: Returns dataframe with results.
"""
logger.info("Beginning execution of querying every column...")
logger.warning(
"Query_Every_Column will be deprecated... Use Query_All in PyTables class instead!"
)
logger.debug(f"Function to be run: {query_function}")
logger.debug("Dynamically creating DAX query...")
query_str = "EVALUATE UNION(\n"
Expand All @@ -487,13 +488,14 @@ def Query_Every_Column(
def Query_Every_Table(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame:
"""This will dynamically create a query to pull all tables from the model and run the query function.
It will replace the _ with the table to run.

Args:
query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. Defaults to 'COUNTROWS(_)'.

Returns:
pd.DataFrame: Returns dataframe with results
"""
logger.warning(
"Query_Every_Table will be deprecated... Use Query_All in PyTables class instead!"
)
logger.info("Beginning execution of querying every table...")
logger.debug(f"Function to be run: {query_function}")
logger.debug("Dynamically creating DAX query...")
Expand Down
71 changes: 71 additions & 0 deletions pytabular/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,74 @@ def __init__(self, objects) -> None:
def Refresh(self, *args, **kwargs):
model = self._objects[0].Model
return model.Refresh(self, *args, **kwargs)

def Query_All(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame:
"""This will dynamically create a query to pull all tables from the model and run the query function.
It will replace the _ with the table to run.

Args:
query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. Defaults to 'COUNTROWS(_)'.

Returns:
pd.DataFrame: Returns dataframe with results
"""
logger.info("Querying every table in PyTables...")
logger.debug(f"Function to be run: {query_function}")
logger.debug("Dynamically creating DAX query...")
query_str = "EVALUATE UNION(\n"
for table in self:
table_name = table.get_Name()
dax_table_identifier = f"'{table_name}'"
query_str += f"ROW(\"Table\",\"{table_name}\",\"{query_function}\",{query_function.replace('_',dax_table_identifier)}),\n"
query_str = f"{query_str[:-2]})"
return self[0].Model.Query(query_str)

def Find_Zero_Rows(self):
"""Returns PyTables class of tables with zero rows queried."""
query_function: str = "COUNTROWS(_)"
df = self.Query_All(query_function)

table_names = df[df[f"[{query_function}]"].isna()]["[Table]"].to_list()
logger.debug(f"Found {table_names}")
tables = [self[name] for name in table_names]
return self.__class__(tables)

def Last_Refresh(self, group_partition: bool = True) -> pd.DataFrame:
"""Returns pd.DataFrame of tables with their latest refresh time.
Optional 'group_partition' variable, default is True.
If False an extra column will be include to have the last refresh time to the grain of the partition
Example to add to model model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes')

Args:
model (pytabular.Tabular): Tabular Model
group_partition (bool, optional): Whether or not you want the grain of the dataframe to be by table or by partition. Defaults to True.

Returns:
pd.DataFrame: pd dataframe with the RefreshedTime property: https://docs.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.partition.refreshedtime?view=analysisservices-dotnet#microsoft-analysisservices-tabular-partition-refreshedtime
If group_partition == True and the table has multiple partitions, then df.groupby(by["tables"]).max()
"""
data = {
"Tables": [
partition.Table.Name for table in self for partition in table.Partitions
],
"Partitions": [
partition.Name for table in self for partition in table.Partitions
],
"RefreshedTime": [
partition.Last_Refresh()
for table in self
for partition in table.Partitions
],
}
df = pd.DataFrame(data)
if group_partition:
logger.debug("Grouping together to grain of Table")
return (
df[["Tables", "RefreshedTime"]]
.groupby(by=["Tables"])
.max()
.reset_index(drop=False)
)
else:
logger.debug("Returning DF")
return df
10 changes: 9 additions & 1 deletion pytabular/tabular_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,10 @@ def _refresh_handler(source, args):

class Refresh_Trace(Base_Trace):
"""Subclass of Base_Trace. For built-in Refresh Tracing.
Run by default when refreshing tables or partitions.

Args:
Base_Trace (_type_): _description_
Base_Trace (Base_Trace): Base_Trace Class
"""

def __init__(
Expand Down Expand Up @@ -297,6 +298,13 @@ def _query_monitor_handler(source, args):


class Query_Monitor(Base_Trace):
"""Subclass of Base_Trace. For built-in Query Monitoring.
If you want to see full query text, set logger to debug.

Args:
Base_Trace (Base_Trace): Base_Trace Class
"""

def __init__(
self,
Tabular_Class,
Expand Down
Loading