From 3f97bae7d29a130615f6c96bb32d7ba2b45ce31b Mon Sep 17 00:00:00 2001 From: Curtis Stallings Date: Mon, 2 Jan 2023 09:44:02 -0600 Subject: [PATCH 1/7] moving utils function to logic_utils file --- pytabular/logic_utils.py | 49 ++++++++++++++++++++++++++++++++++++++++ pytabular/utils.py | 47 -------------------------------------- 2 files changed, 49 insertions(+), 47 deletions(-) delete mode 100644 pytabular/utils.py diff --git a/pytabular/logic_utils.py b/pytabular/logic_utils.py index a5c02b3..59d16f7 100644 --- a/pytabular/logic_utils.py +++ b/pytabular/logic_utils.py @@ -219,3 +219,52 @@ def get_value_to_df(Query: AdomdDataReader, index: int): return Query.GetValue(index).ToDouble(Query.GetValue(index)) else: return Query.GetValue(index) + + +def dataframe_to_dict(df): + """ + Convert to Dataframe to dictionary and + alter columns names with; + - Underscores (_) to spaces + - All Strings are converted to Title Case. + """ + list_of_dicts = df.to_dict("records") + return [ + {k.replace("_", " ").title(): v for k, v in dict.items()} + for dict in list_of_dicts + ] + + +def dict_to_markdown_table(list_of_dicts: list, columns_to_include: list = None): + """ + Description: Generate a Markdown table based on a list of dictionaries. + Args: + list_of_dicts -> List of Dictionaries that need to be converted + to a markdown table. + columns_to_include -> Default = None, and all colums are included. + If a list is supplied, those columns will be included. + Example: + columns = ['Referenced Object Type', 'Referenced Table', 'Referenced Object'] + dict_to_markdown_table(dependancies, columns) + + Result: + | Referenced Object Type | Referenced Table | Referenced Object | + | ---------------------- | ---------------- | ------------------------------- | + | TABLE | Cases | Cases | + | COLUMN | Cases | IsClosed | + | CALC_COLUMN | Cases | Resolution Time (Working Hours) | + + """ + keys = set().union(*[set(d.keys()) for d in list_of_dicts]) + + if columns_to_include is not None: + keys = list(keys.intersection(columns_to_include)) + + table_header = f"| {' | '.join(map(str, keys))} |" + table_header_separator = "|-----" * len(keys) + "|" + markdown_table = [table_header, table_header_separator] + + for row in list_of_dicts: + table_row = f"| {' | '.join(str(row.get(key, '')) for key in keys)} |" + markdown_table.append(table_row) + return "\n".join(markdown_table) diff --git a/pytabular/utils.py b/pytabular/utils.py deleted file mode 100644 index d8d304e..0000000 --- a/pytabular/utils.py +++ /dev/null @@ -1,47 +0,0 @@ -def dataframe_to_dict(df): - """ - Convert to Dataframe to dictionary and - alter columns names with; - - Underscores (_) to spaces - - All Strings are converted to Title Case. - """ - list_of_dicts = df.to_dict("records") - return [ - {k.replace("_", " ").title(): v for k, v in dict.items()} - for dict in list_of_dicts - ] - - -def dict_to_markdown_table(list_of_dicts: list, columns_to_include: list = None): - """ - Description: Generate a Markdown table based on a list of dictionaries. - Args: - list_of_dicts -> List of Dictionaries that need to be converted - to a markdown table. - columns_to_include -> Default = None, and all colums are included. - If a list is supplied, those columns will be included. - Example: - columns = ['Referenced Object Type', 'Referenced Table', 'Referenced Object'] - dict_to_markdown_table(dependancies, columns) - - Result: - | Referenced Object Type | Referenced Table | Referenced Object | - | ---------------------- | ---------------- | ------------------------------- | - | TABLE | Cases | Cases | - | COLUMN | Cases | IsClosed | - | CALC_COLUMN | Cases | Resolution Time (Working Hours) | - - """ - keys = set().union(*[set(d.keys()) for d in list_of_dicts]) - - if columns_to_include is not None: - keys = list(keys.intersection(columns_to_include)) - - table_header = f"| {' | '.join(map(str, keys))} |" - table_header_separator = "|-----" * len(keys) + "|" - markdown_table = [table_header, table_header_separator] - - for row in list_of_dicts: - table_row = f"| {' | '.join(str(row.get(key, '')) for key in keys)} |" - markdown_table.append(table_row) - return "\n".join(markdown_table) From 05a4a53fc2e94ddbc36c3cee0d2997203ca49328 Mon Sep 17 00:00:00 2001 From: Curtis Stallings Date: Mon, 2 Jan 2023 10:58:04 -0600 Subject: [PATCH 2/7] starting to deprecate custom stuff --- pytabular/__init__.py | 2 -- pytabular/basic_checks.py | 56 -------------------------------- pytabular/column.py | 28 +++++++++++++++- pytabular/pytabular.py | 46 -------------------------- pytabular/table.py | 68 +++++++++++++++++++++++++++++++++++++++ test/test_2tabular.py | 14 ++++++-- test/test_3custom.py | 10 ------ 7 files changed, 107 insertions(+), 117 deletions(-) diff --git a/pytabular/__init__.py b/pytabular/__init__.py index d29ff16..167380c 100644 --- a/pytabular/__init__.py +++ b/pytabular/__init__.py @@ -49,8 +49,6 @@ from .pytabular import Tabular from .basic_checks import ( - Return_Zero_Row_Tables, - Table_Last_Refresh_Times, BPA_Violations_To_DF, Last_X_Interval, ) diff --git a/pytabular/basic_checks.py b/pytabular/basic_checks.py index 9fe46c8..ae09b1a 100644 --- a/pytabular/basic_checks.py +++ b/pytabular/basic_checks.py @@ -9,62 +9,6 @@ logger = logging.getLogger("PyTabular") -def Return_Zero_Row_Tables(model: pytabular.Tabular) -> List[str]: - """Returns list of table names of those that are returning isna() - - Args: - model (pytabular.Tabular): Tabular Model - - Returns: - List[str]: List of table names where DAX COUNTROWS('Table Name') is nan or 0. - """ - logger.info(f"Executing Basic Function {sys._getframe(0).f_code.co_name}") - query_function: str = "COUNTROWS(_)" - df: pd.DataFrame = model.Query_Every_Table(query_function) - return df[df[f"[{query_function}]"].isna()]["[Table]"].to_list() - - -def Table_Last_Refresh_Times( - model: pytabular.Tabular, group_partition: bool = True -) -> pd.DataFrame: - """Returns pd.DataFrame of tables with their latest refresh time. - Optional 'group_partition' variable, default is True. - If False an extra column will be include to have the last refresh time to the grain of the partition - Example to add to model model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes') - - Args: - model (pytabular.Tabular): Tabular Model - group_partition (bool, optional): Whether or not you want the grain of the dataframe to be by table or by partition. Defaults to True. - - Returns: - pd.DataFrame: pd dataframe with the RefreshedTime property: https://docs.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.partition.refreshedtime?view=analysisservices-dotnet#microsoft-analysisservices-tabular-partition-refreshedtime - If group_partition == True and the table has multiple partitions, then df.groupby(by["tables"]).max() - """ - logger.info(f"Executing Basic Function {sys._getframe(0).f_code.co_name}") - data = { - "Tables": [partition.Table.Name for partition in model.Partitions], - "Partitions": [partition.Name for partition in model.Partitions], - "RefreshedTime": [ - ticks_to_datetime(partition.RefreshedTime.Ticks).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ" - )[:-3] - for partition in model.Partitions - ], - } - df = pd.DataFrame(data) - if group_partition: - logger.debug("Grouping together to grain of Table") - return ( - df[["Tables", "RefreshedTime"]] - .groupby(by=["Tables"]) - .max() - .reset_index(drop=False) - ) - else: - logger.debug("Returning DF") - return df - - def BPA_Violations_To_DF(model: pytabular.Tabular, te2: str, bpa: str) -> pd.DataFrame: """Runs BPA Analyzer from TE2 and outputs result into a DF. diff --git a/pytabular/column.py b/pytabular/column.py index 341570a..e4e80c8 100644 --- a/pytabular/column.py +++ b/pytabular/column.py @@ -1,7 +1,7 @@ import logging import pandas as pd from object import PyObject, PyObjects - +from Microsoft.AnalysisServices.Tabular import ColumnType logger = logging.getLogger("PyTabular") @@ -91,3 +91,29 @@ def Values(self) -> pd.DataFrame: class PyColumns(PyObjects): def __init__(self, objects) -> None: super().__init__(objects) + + + def Query_All( + self, query_function: str = "COUNTROWS(VALUES(_))" + ) -> pd.DataFrame: + """This will dynamically create a query to pull all columns from the model and run the query function. It will replace the _ with the column to run. + + Args: + query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. + + Returns: + pd.DataFrame: Returns dataframe with results. + """ + logger.info("Beginning execution of querying every column...") + logger.debug(f"Function to be run: {query_function}") + logger.debug("Dynamically creating DAX query...") + query_str = "EVALUATE UNION(\n" + columns = [column for column in self] + for column in columns: + if column.Type != ColumnType.RowNumber: + table_name = column.Table.get_Name() + column_name = column.get_Name() + dax_identifier = f"'{table_name}'[{column_name}]" + query_str += f"ROW(\"Table\",\"{table_name}\",\"Column\",\"{column_name}\",\"{query_function}\",{query_function.replace('_',dax_identifier)}),\n" + query_str = f"{query_str[:-2]})" + return self[0].Table.Model.Query(query_str) \ No newline at end of file diff --git a/pytabular/pytabular.py b/pytabular/pytabular.py index 3d3dba6..961cd58 100644 --- a/pytabular/pytabular.py +++ b/pytabular/pytabular.py @@ -459,52 +459,6 @@ def Query( return conn.Query(Query_Str) - def Query_Every_Column( - self, query_function: str = "COUNTROWS(VALUES(_))" - ) -> pd.DataFrame: - """This will dynamically create a query to pull all columns from the model and run the query function. It will replace the _ with the column to run. - - Args: - query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. - - Returns: - pd.DataFrame: Returns dataframe with results. - """ - logger.info("Beginning execution of querying every column...") - logger.debug(f"Function to be run: {query_function}") - logger.debug("Dynamically creating DAX query...") - query_str = "EVALUATE UNION(\n" - columns = [column for table in self.Tables for column in table.Columns] - for column in columns: - if column.Type != ColumnType.RowNumber: - table_name = column.Table.get_Name() - column_name = column.get_Name() - dax_identifier = f"'{table_name}'[{column_name}]" - query_str += f"ROW(\"Table\",\"{table_name}\",\"Column\",\"{column_name}\",\"{query_function}\",{query_function.replace('_',dax_identifier)}),\n" - query_str = f"{query_str[:-2]})" - return self.Query(query_str) - - def Query_Every_Table(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame: - """This will dynamically create a query to pull all tables from the model and run the query function. - It will replace the _ with the table to run. - - Args: - query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. Defaults to 'COUNTROWS(_)'. - - Returns: - pd.DataFrame: Returns dataframe with results - """ - logger.info("Beginning execution of querying every table...") - logger.debug(f"Function to be run: {query_function}") - logger.debug("Dynamically creating DAX query...") - query_str = "EVALUATE UNION(\n" - for table in self.Tables: - table_name = table.get_Name() - dax_table_identifier = f"'{table_name}'" - query_str += f"ROW(\"Table\",\"{table_name}\",\"{query_function}\",{query_function.replace('_',dax_table_identifier)}),\n" - query_str = f"{query_str[:-2]})" - return self.Query(query_str) - def Analyze_BPA( self, Tabular_Editor_Exe: str, Best_Practice_Analyzer: str ) -> List[str]: diff --git a/pytabular/table.py b/pytabular/table.py index 6a7be3d..6b1db65 100644 --- a/pytabular/table.py +++ b/pytabular/table.py @@ -100,3 +100,71 @@ def __init__(self, objects) -> None: def Refresh(self, *args, **kwargs): model = self._objects[0].Model return model.Refresh(self, *args, **kwargs) + + + def Query_All(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame: + """This will dynamically create a query to pull all tables from the model and run the query function. + It will replace the _ with the table to run. + + Args: + query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. Defaults to 'COUNTROWS(_)'. + + Returns: + pd.DataFrame: Returns dataframe with results + """ + logger.info("Querying every table in PyTables...") + logger.debug(f"Function to be run: {query_function}") + logger.debug("Dynamically creating DAX query...") + query_str = "EVALUATE UNION(\n" + for table in self: + table_name = table.get_Name() + dax_table_identifier = f"'{table_name}'" + query_str += f"ROW(\"Table\",\"{table_name}\",\"{query_function}\",{query_function.replace('_',dax_table_identifier)}),\n" + query_str = f"{query_str[:-2]})" + return self[0].Model.Query(query_str) + + + def Zero_Row_Tables(self): + """Returns list of table names of those that are returning isna() + """ + query_function: str = "COUNTROWS(_)" + df: pd.DataFrame = self.Query_All(query_function) + return df[df[f"[{query_function}]"].isna()]["[Table]"].to_list() + + + def Last_Refresh( + self, group_partition: bool = True + ) -> pd.DataFrame: + """Returns pd.DataFrame of tables with their latest refresh time. + Optional 'group_partition' variable, default is True. + If False an extra column will be include to have the last refresh time to the grain of the partition + Example to add to model model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes') + + Args: + model (pytabular.Tabular): Tabular Model + group_partition (bool, optional): Whether or not you want the grain of the dataframe to be by table or by partition. Defaults to True. + + Returns: + pd.DataFrame: pd dataframe with the RefreshedTime property: https://docs.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.partition.refreshedtime?view=analysisservices-dotnet#microsoft-analysisservices-tabular-partition-refreshedtime + If group_partition == True and the table has multiple partitions, then df.groupby(by["tables"]).max() + """ + data = { + "Tables": [partition.Table.Name for table in self for partition in table.Partitions], + "Partitions": [partition.Name for table in self for partition in table.Partitions], + "RefreshedTime": [ + partition.Last_Refresh() + for table in self for partition in table.Partitions + ], + } + df = pd.DataFrame(data) + if group_partition: + logger.debug("Grouping together to grain of Table") + return ( + df[["Tables", "RefreshedTime"]] + .groupby(by=["Tables"]) + .max() + .reset_index(drop=False) + ) + else: + logger.debug("Returning DF") + return df diff --git a/test/test_2tabular.py b/test/test_2tabular.py index 7d08f98..54181dc 100644 --- a/test/test_2tabular.py +++ b/test/test_2tabular.py @@ -74,13 +74,13 @@ def test_nonetype_decimal_bug(model): @pytest.mark.parametrize("model", testing_parameters) def test_Table_Last_Refresh_Times(model): """Really just testing the the function completes successfully and returns df""" - assert isinstance(p.Table_Last_Refresh_Times(model), pd.DataFrame) is True + assert isinstance(model.Tables.Last_Refresh(), pd.DataFrame) is True @pytest.mark.parametrize("model", testing_parameters) def test_Return_Zero_Row_Tables(model): """Testing that `Return_Zero_Row_Tables`""" - assert isinstance(p.Return_Zero_Row_Tables(model), list) is True + assert isinstance(model.Tables.Zero_Row_Tables(), list) is True @pytest.mark.parametrize("model", testing_parameters) @@ -99,3 +99,13 @@ def test_get_sample_values(model): assert len(df) > 0 else: assert True + + +@pytest.mark.parametrize("model", testing_parameters) +def test_query_every_table(model): + assert len(model.Tables.Query_All()) > 0 + + +@pytest.mark.parametrize("model", testing_parameters) +def test_query_every_column(model): + assert len(model.Tables[0].Columns.Query_All()) > 0 \ No newline at end of file diff --git a/test/test_3custom.py b/test/test_3custom.py index 265ebda..1f9ea74 100644 --- a/test/test_3custom.py +++ b/test/test_3custom.py @@ -6,16 +6,6 @@ import pytest -@pytest.mark.parametrize("model", testing_parameters) -def test_query_every_table(model): - assert len(model.Query_Every_Table()) > 0 - - -@pytest.mark.parametrize("model", testing_parameters) -def test_query_every_column(model): - assert len(model.Query_Every_Column()) > 0 - - @pytest.mark.parametrize("model", testing_parameters) def test_backingup_table(model): model.Backup_Table(testingtablename) From 4400bfc56a28a8ed7261f3f7f8e864a2b6016e25 Mon Sep 17 00:00:00 2001 From: Curtis Stallings Date: Sat, 7 Jan 2023 14:45:58 -0600 Subject: [PATCH 3/7] adding back function, to pu deprecation warning --- pytabular/__init__.py | 2 ++ pytabular/basic_checks.py | 66 +++++++++++++++++++++++++++++++++++++++ pytabular/pytabular.py | 48 ++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) diff --git a/pytabular/__init__.py b/pytabular/__init__.py index 167380c..9258181 100644 --- a/pytabular/__init__.py +++ b/pytabular/__init__.py @@ -51,6 +51,8 @@ from .basic_checks import ( BPA_Violations_To_DF, Last_X_Interval, + Return_Zero_Row_Tables, + Table_Last_Refresh_Times, ) from .logic_utils import ( pd_dataframe_to_m_expression, diff --git a/pytabular/basic_checks.py b/pytabular/basic_checks.py index ae09b1a..900fa8b 100644 --- a/pytabular/basic_checks.py +++ b/pytabular/basic_checks.py @@ -1,3 +1,10 @@ +""" +All basic checks will eventually be deprecated for more intuitive methods in the right classes. +For example, instead of calling the function `Return_Zero_Row_Tables(model)`, +call the Find_Zero_Rows() method in the PyTables class. +That way you can dynamically run those methods on a subset of tables, +instead of only on the entire model. +""" import logging from typing import List, Union import pytabular @@ -9,6 +16,65 @@ logger = logging.getLogger("PyTabular") +def Return_Zero_Row_Tables(model: pytabular.Tabular) -> List[str]: + """Returns list of table names of those that are returning isna() + Args: + model (pytabular.Tabular): Tabular Model + Returns: + List[str]: List of table names where DAX COUNTROWS('Table Name') is nan or 0. + """ + logger.warning( + "Return_Zero_Row_Tables() will be deprecated. \ + Instead use Zero_Row_Tables() through the PyTables class." + ) + query_function: str = "COUNTROWS(_)" + df: pd.DataFrame = model.Tables.Query_All(query_function) + return df[df[f"[{query_function}]"].isna()]["[Table]"].to_list() + + +def Table_Last_Refresh_Times( + model: pytabular.Tabular, group_partition: bool = True +) -> pd.DataFrame: + """Returns pd.DataFrame of tables with their latest refresh time. + Optional 'group_partition' variable, default is True. + If False an extra column will be include to have the last refresh time to the grain of the partition + Example to add to model model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes') + Args: + model (pytabular.Tabular): Tabular Model + group_partition (bool, optional): Whether or not you want the grain of the dataframe to be by table or by partition. Defaults to True. + Returns: + pd.DataFrame: pd dataframe with the RefreshedTime property: https://docs.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.partition.refreshedtime?view=analysisservices-dotnet#microsoft-analysisservices-tabular-partition-refreshedtime + If group_partition == True and the table has multiple partitions, then df.groupby(by["tables"]).max() + """ + logger.info(f"Executing Basic Function {sys._getframe(0).f_code.co_name}") + logger.warning( + f"{sys._getframe(0).f_code.co_name} will be deprecated... Use Last_Refresh in PyTables class instead!" + ) + + data = { + "Tables": [partition.Table.Name for partition in model.Partitions], + "Partitions": [partition.Name for partition in model.Partitions], + "RefreshedTime": [ + ticks_to_datetime(partition.RefreshedTime.Ticks).strftime( + "%Y-%m-%dT%H:%M:%S.%fZ" + )[:-3] + for partition in model.Partitions + ], + } + df = pd.DataFrame(data) + if group_partition: + logger.debug("Grouping together to grain of Table") + return ( + df[["Tables", "RefreshedTime"]] + .groupby(by=["Tables"]) + .max() + .reset_index(drop=False) + ) + else: + logger.debug("Returning DF") + return df + + def BPA_Violations_To_DF(model: pytabular.Tabular, te2: str, bpa: str) -> pd.DataFrame: """Runs BPA Analyzer from TE2 and outputs result into a DF. diff --git a/pytabular/pytabular.py b/pytabular/pytabular.py index 961cd58..340a312 100644 --- a/pytabular/pytabular.py +++ b/pytabular/pytabular.py @@ -459,6 +459,54 @@ def Query( return conn.Query(Query_Str) + def Query_Every_Column( + self, query_function: str = "COUNTROWS(VALUES(_))" + ) -> pd.DataFrame: + """This will dynamically create a query to pull all columns from the model and run the query function. It will replace the _ with the column to run. + Args: + query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. + Returns: + pd.DataFrame: Returns dataframe with results. + """ + logger.info("Beginning execution of querying every column...") + logger.warning( + "Query_Every_Column will be deprecated... Use Query_All in PyTables class instead!" + ) + logger.debug(f"Function to be run: {query_function}") + logger.debug("Dynamically creating DAX query...") + query_str = "EVALUATE UNION(\n" + columns = [column for table in self.Tables for column in table.Columns] + for column in columns: + if column.Type != ColumnType.RowNumber: + table_name = column.Table.get_Name() + column_name = column.get_Name() + dax_identifier = f"'{table_name}'[{column_name}]" + query_str += f"ROW(\"Table\",\"{table_name}\",\"Column\",\"{column_name}\",\"{query_function}\",{query_function.replace('_',dax_identifier)}),\n" + query_str = f"{query_str[:-2]})" + return self.Query(query_str) + + def Query_Every_Table(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame: + """This will dynamically create a query to pull all tables from the model and run the query function. + It will replace the _ with the table to run. + Args: + query_function (str, optional): Dax query is dynamically building a query with the UNION & ROW DAX Functions. Defaults to 'COUNTROWS(_)'. + Returns: + pd.DataFrame: Returns dataframe with results + """ + logger.warning( + "Query_Every_Table will be deprecated... Use Query_All in PyTables class instead!" + ) + logger.info("Beginning execution of querying every table...") + logger.debug(f"Function to be run: {query_function}") + logger.debug("Dynamically creating DAX query...") + query_str = "EVALUATE UNION(\n" + for table in self.Tables: + table_name = table.get_Name() + dax_table_identifier = f"'{table_name}'" + query_str += f"ROW(\"Table\",\"{table_name}\",\"{query_function}\",{query_function.replace('_',dax_table_identifier)}),\n" + query_str = f"{query_str[:-2]})" + return self.Query(query_str) + def Analyze_BPA( self, Tabular_Editor_Exe: str, Best_Practice_Analyzer: str ) -> List[str]: From 33b2cbad578905fb01c11f2f1fca7dcc4769393b Mon Sep 17 00:00:00 2001 From: Curtis Stallings Date: Sat, 7 Jan 2023 14:46:27 -0600 Subject: [PATCH 4/7] Replacement basic functions into class methods --- pytabular/column.py | 7 ++----- pytabular/table.py | 29 ++++++++++++++++------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pytabular/column.py b/pytabular/column.py index e4e80c8..55bda6f 100644 --- a/pytabular/column.py +++ b/pytabular/column.py @@ -92,10 +92,7 @@ class PyColumns(PyObjects): def __init__(self, objects) -> None: super().__init__(objects) - - def Query_All( - self, query_function: str = "COUNTROWS(VALUES(_))" - ) -> pd.DataFrame: + def Query_All(self, query_function: str = "COUNTROWS(VALUES(_))") -> pd.DataFrame: """This will dynamically create a query to pull all columns from the model and run the query function. It will replace the _ with the column to run. Args: @@ -116,4 +113,4 @@ def Query_All( dax_identifier = f"'{table_name}'[{column_name}]" query_str += f"ROW(\"Table\",\"{table_name}\",\"Column\",\"{column_name}\",\"{query_function}\",{query_function.replace('_',dax_identifier)}),\n" query_str = f"{query_str[:-2]})" - return self[0].Table.Model.Query(query_str) \ No newline at end of file + return self[0].Table.Model.Query(query_str) diff --git a/pytabular/table.py b/pytabular/table.py index 6b1db65..e825c1a 100644 --- a/pytabular/table.py +++ b/pytabular/table.py @@ -101,7 +101,6 @@ def Refresh(self, *args, **kwargs): model = self._objects[0].Model return model.Refresh(self, *args, **kwargs) - def Query_All(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame: """This will dynamically create a query to pull all tables from the model and run the query function. It will replace the _ with the table to run. @@ -123,18 +122,17 @@ def Query_All(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame: query_str = f"{query_str[:-2]})" return self[0].Model.Query(query_str) - - def Zero_Row_Tables(self): - """Returns list of table names of those that are returning isna() - """ + def Find_Zero_Rows(self): + """Returns PyTables class of tables with zero rows queried.""" query_function: str = "COUNTROWS(_)" - df: pd.DataFrame = self.Query_All(query_function) - return df[df[f"[{query_function}]"].isna()]["[Table]"].to_list() + df = self.Query_All(query_function) + table_names = df[df[f"[{query_function}]"].isna()]["[Table]"].to_list() + logger.debug(f"Found {table_names}") + tables = [self[name] for name in table_names] + return self.__class__(tables) - def Last_Refresh( - self, group_partition: bool = True - ) -> pd.DataFrame: + def Last_Refresh(self, group_partition: bool = True) -> pd.DataFrame: """Returns pd.DataFrame of tables with their latest refresh time. Optional 'group_partition' variable, default is True. If False an extra column will be include to have the last refresh time to the grain of the partition @@ -149,11 +147,16 @@ def Last_Refresh( If group_partition == True and the table has multiple partitions, then df.groupby(by["tables"]).max() """ data = { - "Tables": [partition.Table.Name for table in self for partition in table.Partitions], - "Partitions": [partition.Name for table in self for partition in table.Partitions], + "Tables": [ + partition.Table.Name for table in self for partition in table.Partitions + ], + "Partitions": [ + partition.Name for table in self for partition in table.Partitions + ], "RefreshedTime": [ partition.Last_Refresh() - for table in self for partition in table.Partitions + for table in self + for partition in table.Partitions ], } df = pd.DataFrame(data) From 8c8bcf4787c2ace2107a7efca2a0ff418fc28ec6 Mon Sep 17 00:00:00 2001 From: Curtis Stallings Date: Sat, 7 Jan 2023 14:46:52 -0600 Subject: [PATCH 5/7] docstring documentation updates --- README.md | 19 +++---------------- mkgendocs.yml | 9 ++++++++- pytabular/tabular_tracing.py | 10 +++++++++- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 3b8247b..3f1a765 100644 --- a/README.md +++ b/README.md @@ -118,9 +118,9 @@ This will use the function [Return_Zero_Row_Tables](https://curts0.github.io/PyT ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) -tables = pytabular.Return_Zero_Row_Tables(model) +tables = model.Tables.Find_Zero_Rows() if len(tables) > 0: - model.Refresh(tables, Tracing = True) #Add a trace in there for some fun. + model.Refresh(tables) ``` #### Sneak in a refresh. @@ -139,7 +139,7 @@ This will use the function [Table_Last_Refresh_Times](https://curts0.github.io/P ```python import pytabular model = pytabular.Tabular(CONNECTION_STR) -df = pytabular.Table_Last_Refresh_Times(model, group_partition = False) +df = model.Tables.Last_Refresh() model.Create_Table(df, 'Refresh Times') ``` @@ -157,19 +157,6 @@ if len(results) > 0: #Revert deployment here! ``` -#### Backup & Revert a Table. -USE WITH CAUTION, obviously not in PROD. I have been experimenting with this concept. Made for selfish reasons. Will probably get removed and I'll keep in my own local version. But fun to work with. Uses two methods. [Backup_Table](https://curts0.github.io/PyTabular/Tabular/#backup_table) and [Revert_Table](https://curts0.github.io/PyTabular/Tabular/#revert_table) - -```python -import pytabular -model = pytabular.Tabular(CONNECTION_STR) -model.Backup_Table('TableName') #This will backup the table with surround items (columns,measures,relationships,roles,hierarchies,etc.) and will add a suffix of '_backup' -#-----------# -#Make any changes to your original table and then revert or delete backup as necessary -#-----------# -model.Revert_Table('TableName') #This will essentially replace your original with _backup -``` - #### Loop through and query Dax files Let's say you have multiple dax queries you would like to store and run through as checks. The [Query](https://curts0.github.io/PyTabular/Tabular/#query) method on the Tabular class can also take file paths. Can really be any file type as it's just checking os.path.isfile(). But would suggest .dax or .txt. It will read the file that use that as the new Query_str argument. ```python diff --git a/mkgendocs.yml b/mkgendocs.yml index b97c61e..1185b9c 100644 --- a/mkgendocs.yml +++ b/mkgendocs.yml @@ -39,6 +39,7 @@ pages: classes: - Base_Trace - Refresh_Trace + - Query_Monitor - page: "Best Practice Analyzer.md" source: 'pytabular/best_practice_analyzer.py' functions: @@ -69,4 +70,10 @@ pages: - pandas_datatype_to_tabular_datatype - pd_dataframe_to_m_expression - remove_folder_and_contents - - remove_suffix \ No newline at end of file + - remove_suffix + - remove_file + - sql_wrap_count_around_query + - get_sub_list + - get_value_to_df + - dataframe_to_dict + - dict_to_markdown_table \ No newline at end of file diff --git a/pytabular/tabular_tracing.py b/pytabular/tabular_tracing.py index d24bb50..e5c2589 100644 --- a/pytabular/tabular_tracing.py +++ b/pytabular/tabular_tracing.py @@ -249,9 +249,10 @@ def _refresh_handler(source, args): class Refresh_Trace(Base_Trace): """Subclass of Base_Trace. For built-in Refresh Tracing. + Run by default when refreshing tables or partitions. Args: - Base_Trace (_type_): _description_ + Base_Trace (Base_Trace): Base_Trace Class """ def __init__( @@ -297,6 +298,13 @@ def _query_monitor_handler(source, args): class Query_Monitor(Base_Trace): + """Subclass of Base_Trace. For built-in Query Monitoring. + If you want to see full query text, set logger to debug. + + Args: + Base_Trace (Base_Trace): Base_Trace Class + """ + def __init__( self, Tabular_Class, From d9ba88976c678ffc67487f31253c280feab8a625 Mon Sep 17 00:00:00 2001 From: Curtis Stallings Date: Sat, 7 Jan 2023 14:47:10 -0600 Subject: [PATCH 6/7] tests for deprecated functions --- test/test_2tabular.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/test/test_2tabular.py b/test/test_2tabular.py index 54181dc..1f9697d 100644 --- a/test/test_2tabular.py +++ b/test/test_2tabular.py @@ -80,7 +80,7 @@ def test_Table_Last_Refresh_Times(model): @pytest.mark.parametrize("model", testing_parameters) def test_Return_Zero_Row_Tables(model): """Testing that `Return_Zero_Row_Tables`""" - assert isinstance(model.Tables.Zero_Row_Tables(), list) is True + assert isinstance(model.Tables.Find_Zero_Rows(), p.pytabular.PyTables) is True @pytest.mark.parametrize("model", testing_parameters) @@ -108,4 +108,14 @@ def test_query_every_table(model): @pytest.mark.parametrize("model", testing_parameters) def test_query_every_column(model): - assert len(model.Tables[0].Columns.Query_All()) > 0 \ No newline at end of file + assert len(model.Tables[0].Columns.Query_All()) > 0 + + +@pytest.mark.parametrize("model", testing_parameters) +def test_query_every_table_deprecate(model): + assert len(model.Query_Every_Table()) > 0 + + +@pytest.mark.parametrize("model", testing_parameters) +def test_query_every_column_deprecate(model): + assert len(model.Query_Every_Column()) > 0 From dde1a2a5a71355077e0f9b774df7684ec6fe65b4 Mon Sep 17 00:00:00 2001 From: Curtis Stallings Date: Sat, 7 Jan 2023 14:47:15 -0600 Subject: [PATCH 7/7] 0.2.9 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5a2637b..ad4462e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "python_tabular" -version = "0.2.8" +version = "0.2.9" authors = [ { name="Curtis Stallings", email="curtisrstallings@gmail.com" }, ]