diff --git a/README.md b/README.md index 3b8247b..3f1a765 100644 --- a/README.md +++ b/README.md @@ -118,9 +118,9 @@ This will use the function [Return_Zero_Row_Tables](https://curts0.github.io/PyT ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) -tables = pytabular.Return_Zero_Row_Tables(model) +tables = model.Tables.Find_Zero_Rows() if len(tables) > 0: - model.Refresh(tables, Tracing = True) #Add a trace in there for some fun. + model.Refresh(tables) ``` #### Sneak in a refresh. @@ -139,7 +139,7 @@ This will use the function [Table_Last_Refresh_Times](https://curts0.github.io/P ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) -df = pytabular.Table_Last_Refresh_Times(model, group_partition = False) +df = model.Tables.Last_Refresh() model.Create_Table(df, 'Refresh Times') ``` @@ -157,19 +157,6 @@ if len(results) > 0: #Revert deployment here! ``` -#### Backup & Revert a Table. -USE WITH CAUTION, obviously not in PROD. I have been experimenting with this concept. Made for selfish reasons. Will probably get removed and I'll keep in my own local version. But fun to work with. Uses two methods. [Backup_Table](https://curts0.github.io/PyTabular/Tabular/#backup_table) and [Revert_Table](https://curts0.github.io/PyTabular/Tabular/#revert_table) - -```python -import pytabular -model = pytabular.Tabular(CONNECTION_STR) -model.Backup_Table('TableName') #This will backup the table with surround items (columns,measures,relationships,roles,hierarchies,etc.) and will add a suffix of '_backup' -#-----------# -#Make any changes to your original table and then revert or delete backup as necessary -#-----------# -model.Revert_Table('TableName') #This will essentially replace your original with _backup -``` - #### Loop through and query Dax files Let's say you have multiple dax queries you would like to store and run through as checks. The [Query](https://curts0.github.io/PyTabular/Tabular/#query) method on the Tabular class can also take file paths. Can really be any file type as it's just checking os.path.isfile(). But would suggest .dax or .txt. It will read the file that use that as the new Query_str argument. ```python diff --git a/mkgendocs.yml b/mkgendocs.yml index b97c61e..1185b9c 100644 --- a/mkgendocs.yml +++ b/mkgendocs.yml @@ -39,6 +39,7 @@ pages: classes: - Base_Trace - Refresh_Trace + - Query_Monitor - page: "Best Practice Analyzer.md" source: 'pytabular/best_practice_analyzer.py' functions: @@ -69,4 +70,10 @@ pages: - pandas_datatype_to_tabular_datatype - pd_dataframe_to_m_expression - remove_folder_and_contents - - remove_suffix \ No newline at end of file + - remove_suffix + - remove_file + - sql_wrap_count_around_query + - get_sub_list + - get_value_to_df + - dataframe_to_dict + - dict_to_markdown_table \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 5a2637b..ad4462e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "python_tabular" -version = "0.2.8" +version = "0.2.9" authors = [ { name="Curtis Stallings", email="curtisrstallings@gmail.com" }, ] diff --git a/pytabular/__init__.py b/pytabular/__init__.py index d29ff16..9258181 100644 --- a/pytabular/__init__.py +++ b/pytabular/__init__.py @@ -49,10 +49,10 @@ from .pytabular import Tabular from .basic_checks import ( - Return_Zero_Row_Tables, - Table_Last_Refresh_Times, BPA_Violations_To_DF, Last_X_Interval, + Return_Zero_Row_Tables, + Table_Last_Refresh_Times, ) from .logic_utils import ( pd_dataframe_to_m_expression, diff --git a/pytabular/basic_checks.py b/pytabular/basic_checks.py index 9fe46c8..900fa8b 100644 --- a/pytabular/basic_checks.py +++ b/pytabular/basic_checks.py @@ -1,3 +1,10 @@ +""" +All basic checks will eventually be deprecated for more intuitive methods in the right classes. +For example, instead of calling the function `Return_Zero_Row_Tables(model)`, +call the Find_Zero_Rows() method in the PyTables class. +That way you can dynamically run those methods on a subset of tables, +instead of only on the entire model. +""" import logging from typing import List, Union import pytabular @@ -11,16 +18,17 @@ def Return_Zero_Row_Tables(model: pytabular.Tabular) -> List[str]: """Returns list of table names of those that are returning isna() - Args: model (pytabular.Tabular): Tabular Model - Returns: List[str]: List of table names where DAX COUNTROWS('Table Name') is nan or 0. """ - logger.info(f"Executing Basic Function {sys._getframe(0).f_code.co_name}") + logger.warning( + "Return_Zero_Row_Tables() will be deprecated. \ + Instead use Zero_Row_Tables() through the PyTables class." + ) query_function: str = "COUNTROWS(_)" - df: pd.DataFrame = model.Query_Every_Table(query_function) + df: pd.DataFrame = model.Tables.Query_All(query_function) return df[df[f"[{query_function}]"].isna()]["[Table]"].to_list() @@ -31,16 +39,18 @@ def Table_Last_Refresh_Times( Optional 'group_partition' variable, default is True. If False an extra column will be include to have the last refresh time to the grain of the partition Example to add to model model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes') - Args: model (pytabular.Tabular): Tabular Model group_partition (bool, optional): Whether or not you want the grain of the dataframe to be by table or by partition. Defaults to True. - Returns: pd.DataFrame: pd dataframe with the RefreshedTime property: https://docs.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.partition.refreshedtime?view=analysisservices-dotnet#microsoft-analysisservices-tabular-partition-refreshedtime If group_partition == True and the table has multiple partitions, then df.groupby(by["tables"]).max() """ logger.info(f"Executing Basic Function {sys._getframe(0).f_code.co_name}") + logger.warning( + f"{sys._getframe(0).f_code.co_name} will be deprecated... Use Last_Refresh in PyTables class instead!" + ) + data = { "Tables": [partition.Table.Name for partition in model.Partitions], "Partitions": [partition.Name for partition in model.Partitions], diff --git a/pytabular/column.py b/pytabular/column.py index 341570a..55bda6f 100644 --- a/pytabular/column.py +++ b/pytabular/column.py @@ -1,7 +1,7 @@ import logging import pandas as pd from object import PyObject, PyObjects - +from Microsoft.AnalysisServices.Tabular import ColumnType logger = logging.getLogger("PyTabular") @@ -91,3 +91,26 @@ def Values(self) -> pd.DataFrame: class PyColumns(PyObjects): def __init__(self, objects) -> None: super().__init__(objects) + + def Query_All(self, query_function: str = "COUNTROWS(VALUES(_))") -> pd.DataFrame: + """This will dynamically create a query to pull all columns from the model and run the query function. It will replace the _ with the column to run. + + Args: + query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. + + Returns: + pd.DataFrame: Returns dataframe with results. + """ + logger.info("Beginning execution of querying every column...") + logger.debug(f"Function to be run: {query_function}") + logger.debug("Dynamically creating DAX query...") + query_str = "EVALUATE UNION(\n" + columns = [column for column in self] + for column in columns: + if column.Type != ColumnType.RowNumber: + table_name = column.Table.get_Name() + column_name = column.get_Name() + dax_identifier = f"'{table_name}'[{column_name}]" + query_str += f"ROW(\"Table\",\"{table_name}\",\"Column\",\"{column_name}\",\"{query_function}\",{query_function.replace('_',dax_identifier)}),\n" + query_str = f"{query_str[:-2]})" + return self[0].Table.Model.Query(query_str) diff --git a/pytabular/logic_utils.py b/pytabular/logic_utils.py index a5c02b3..59d16f7 100644 --- a/pytabular/logic_utils.py +++ b/pytabular/logic_utils.py @@ -219,3 +219,52 @@ def get_value_to_df(Query: AdomdDataReader, index: int): return Query.GetValue(index).ToDouble(Query.GetValue(index)) else: return Query.GetValue(index) + + +def dataframe_to_dict(df): + """ + Convert to Dataframe to dictionary and + alter columns names with; + - Underscores (_) to spaces + - All Strings are converted to Title Case. + """ + list_of_dicts = df.to_dict("records") + return [ + {k.replace("_", " ").title(): v for k, v in dict.items()} + for dict in list_of_dicts + ] + + +def dict_to_markdown_table(list_of_dicts: list, columns_to_include: list = None): + """ + Description: Generate a Markdown table based on a list of dictionaries. + Args: + list_of_dicts -> List of Dictionaries that need to be converted + to a markdown table. + columns_to_include -> Default = None, and all colums are included. + If a list is supplied, those columns will be included. + Example: + columns = ['Referenced Object Type', 'Referenced Table', 'Referenced Object'] + dict_to_markdown_table(dependancies, columns) + + Result: + | Referenced Object Type | Referenced Table | Referenced Object | + | ---------------------- | ---------------- | ------------------------------- | + | TABLE | Cases | Cases | + | COLUMN | Cases | IsClosed | + | CALC_COLUMN | Cases | Resolution Time (Working Hours) | + + """ + keys = set().union(*[set(d.keys()) for d in list_of_dicts]) + + if columns_to_include is not None: + keys = list(keys.intersection(columns_to_include)) + + table_header = f"| {' | '.join(map(str, keys))} |" + table_header_separator = "|-----" * len(keys) + "|" + markdown_table = [table_header, table_header_separator] + + for row in list_of_dicts: + table_row = f"| {' | '.join(str(row.get(key, '')) for key in keys)} |" + markdown_table.append(table_row) + return "\n".join(markdown_table) diff --git a/pytabular/pytabular.py b/pytabular/pytabular.py index 3d3dba6..340a312 100644 --- a/pytabular/pytabular.py +++ b/pytabular/pytabular.py @@ -463,14 +463,15 @@ def Query_Every_Column( self, query_function: str = "COUNTROWS(VALUES(_))" ) -> pd.DataFrame: """This will dynamically create a query to pull all columns from the model and run the query function. It will replace the _ with the column to run. - Args: query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. - Returns: pd.DataFrame: Returns dataframe with results. """ logger.info("Beginning execution of querying every column...") + logger.warning( + "Query_Every_Column will be deprecated... Use Query_All in PyTables class instead!" + ) logger.debug(f"Function to be run: {query_function}") logger.debug("Dynamically creating DAX query...") query_str = "EVALUATE UNION(\n" @@ -487,13 +488,14 @@ def Query_Every_Column( def Query_Every_Table(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame: """This will dynamically create a query to pull all tables from the model and run the query function. It will replace the _ with the table to run. - Args: query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. Defaults to 'COUNTROWS(_)'. - Returns: pd.DataFrame: Returns dataframe with results """ + logger.warning( + "Query_Every_Table will be deprecated... Use Query_All in PyTables class instead!" + ) logger.info("Beginning execution of querying every table...") logger.debug(f"Function to be run: {query_function}") logger.debug("Dynamically creating DAX query...") diff --git a/pytabular/table.py b/pytabular/table.py index 6a7be3d..e825c1a 100644 --- a/pytabular/table.py +++ b/pytabular/table.py @@ -100,3 +100,74 @@ def __init__(self, objects) -> None: def Refresh(self, *args, **kwargs): model = self._objects[0].Model return model.Refresh(self, *args, **kwargs) + + def Query_All(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame: + """This will dynamically create a query to pull all tables from the model and run the query function. + It will replace the _ with the table to run. + + Args: + query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. Defaults to 'COUNTROWS(_)'. + + Returns: + pd.DataFrame: Returns dataframe with results + """ + logger.info("Querying every table in PyTables...") + logger.debug(f"Function to be run: {query_function}") + logger.debug("Dynamically creating DAX query...") + query_str = "EVALUATE UNION(\n" + for table in self: + table_name = table.get_Name() + dax_table_identifier = f"'{table_name}'" + query_str += f"ROW(\"Table\",\"{table_name}\",\"{query_function}\",{query_function.replace('_',dax_table_identifier)}),\n" + query_str = f"{query_str[:-2]})" + return self[0].Model.Query(query_str) + + def Find_Zero_Rows(self): + """Returns PyTables class of tables with zero rows queried.""" + query_function: str = "COUNTROWS(_)" + df = self.Query_All(query_function) + + table_names = df[df[f"[{query_function}]"].isna()]["[Table]"].to_list() + logger.debug(f"Found {table_names}") + tables = [self[name] for name in table_names] + return self.__class__(tables) + + def Last_Refresh(self, group_partition: bool = True) -> pd.DataFrame: + """Returns pd.DataFrame of tables with their latest refresh time. + Optional 'group_partition' variable, default is True. + If False an extra column will be include to have the last refresh time to the grain of the partition + Example to add to model model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes') + + Args: + model (pytabular.Tabular): Tabular Model + group_partition (bool, optional): Whether or not you want the grain of the dataframe to be by table or by partition. Defaults to True. + + Returns: + pd.DataFrame: pd dataframe with the RefreshedTime property: https://docs.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.partition.refreshedtime?view=analysisservices-dotnet#microsoft-analysisservices-tabular-partition-refreshedtime + If group_partition == True and the table has multiple partitions, then df.groupby(by["tables"]).max() + """ + data = { + "Tables": [ + partition.Table.Name for table in self for partition in table.Partitions + ], + "Partitions": [ + partition.Name for table in self for partition in table.Partitions + ], + "RefreshedTime": [ + partition.Last_Refresh() + for table in self + for partition in table.Partitions + ], + } + df = pd.DataFrame(data) + if group_partition: + logger.debug("Grouping together to grain of Table") + return ( + df[["Tables", "RefreshedTime"]] + .groupby(by=["Tables"]) + .max() + .reset_index(drop=False) + ) + else: + logger.debug("Returning DF") + return df diff --git a/pytabular/tabular_tracing.py b/pytabular/tabular_tracing.py index d24bb50..e5c2589 100644 --- a/pytabular/tabular_tracing.py +++ b/pytabular/tabular_tracing.py @@ -249,9 +249,10 @@ def _refresh_handler(source, args): class Refresh_Trace(Base_Trace): """Subclass of Base_Trace. For built-in Refresh Tracing. + Run by default when refreshing tables or partitions. Args: - Base_Trace (_type_): _description_ + Base_Trace (Base_Trace): Base_Trace Class """ def __init__( @@ -297,6 +298,13 @@ def _query_monitor_handler(source, args): class Query_Monitor(Base_Trace): + """Subclass of Base_Trace. For built-in Query Monitoring. + If you want to see full query text, set logger to debug. + + Args: + Base_Trace (Base_Trace): Base_Trace Class + """ + def __init__( self, Tabular_Class, diff --git a/pytabular/utils.py b/pytabular/utils.py deleted file mode 100644 index d8d304e..0000000 --- a/pytabular/utils.py +++ /dev/null @@ -1,47 +0,0 @@ -def dataframe_to_dict(df): - """ - Convert to Dataframe to dictionary and - alter columns names with; - - Underscores (_) to spaces - - All Strings are converted to Title Case. - """ - list_of_dicts = df.to_dict("records") - return [ - {k.replace("_", " ").title(): v for k, v in dict.items()} - for dict in list_of_dicts - ] - - -def dict_to_markdown_table(list_of_dicts: list, columns_to_include: list = None): - """ - Description: Generate a Markdown table based on a list of dictionaries. - Args: - list_of_dicts -> List of Dictionaries that need to be converted - to a markdown table. - columns_to_include -> Default = None, and all colums are included. - If a list is supplied, those columns will be included. - Example: - columns = ['Referenced Object Type', 'Referenced Table', 'Referenced Object'] - dict_to_markdown_table(dependancies, columns) - - Result: - | Referenced Object Type | Referenced Table | Referenced Object | - | ---------------------- | ---------------- | ------------------------------- | - | TABLE | Cases | Cases | - | COLUMN | Cases | IsClosed | - | CALC_COLUMN | Cases | Resolution Time (Working Hours) | - - """ - keys = set().union(*[set(d.keys()) for d in list_of_dicts]) - - if columns_to_include is not None: - keys = list(keys.intersection(columns_to_include)) - - table_header = f"| {' | '.join(map(str, keys))} |" - table_header_separator = "|-----" * len(keys) + "|" - markdown_table = [table_header, table_header_separator] - - for row in list_of_dicts: - table_row = f"| {' | '.join(str(row.get(key, '')) for key in keys)} |" - markdown_table.append(table_row) - return "\n".join(markdown_table) diff --git a/test/test_2tabular.py b/test/test_2tabular.py index 7d08f98..1f9697d 100644 --- a/test/test_2tabular.py +++ b/test/test_2tabular.py @@ -74,13 +74,13 @@ def test_nonetype_decimal_bug(model): @pytest.mark.parametrize("model", testing_parameters) def test_Table_Last_Refresh_Times(model): """Really just testing the the function completes successfully and returns df""" - assert isinstance(p.Table_Last_Refresh_Times(model), pd.DataFrame) is True + assert isinstance(model.Tables.Last_Refresh(), pd.DataFrame) is True @pytest.mark.parametrize("model", testing_parameters) def test_Return_Zero_Row_Tables(model): """Testing that `Return_Zero_Row_Tables`""" - assert isinstance(p.Return_Zero_Row_Tables(model), list) is True + assert isinstance(model.Tables.Find_Zero_Rows(), p.pytabular.PyTables) is True @pytest.mark.parametrize("model", testing_parameters) @@ -99,3 +99,23 @@ def test_get_sample_values(model): assert len(df) > 0 else: assert True + + +@pytest.mark.parametrize("model", testing_parameters) +def test_query_every_table(model): + assert len(model.Tables.Query_All()) > 0 + + +@pytest.mark.parametrize("model", testing_parameters) +def test_query_every_column(model): + assert len(model.Tables[0].Columns.Query_All()) > 0 + + +@pytest.mark.parametrize("model", testing_parameters) +def test_query_every_table_deprecate(model): + assert len(model.Query_Every_Table()) > 0 + + +@pytest.mark.parametrize("model", testing_parameters) +def test_query_every_column_deprecate(model): + assert len(model.Query_Every_Column()) > 0 diff --git a/test/test_3custom.py b/test/test_3custom.py index 265ebda..1f9ea74 100644 --- a/test/test_3custom.py +++ b/test/test_3custom.py @@ -6,16 +6,6 @@ import pytest -@pytest.mark.parametrize("model", testing_parameters) -def test_query_every_table(model): - assert len(model.Query_Every_Table()) > 0 - - -@pytest.mark.parametrize("model", testing_parameters) -def test_query_every_column(model): - assert len(model.Query_Every_Column()) > 0 - - @pytest.mark.parametrize("model", testing_parameters) def test_backingup_table(model): model.Backup_Table(testingtablename)