diff --git a/.gitignore b/.gitignore index f4068c5..19286f7 100644 --- a/.gitignore +++ b/.gitignore @@ -133,6 +133,9 @@ venv.bak/ # mkdocs documentation /site +# Output for model documentation +/model-docs + # mypy .mypy_cache/ .dmypy.json @@ -174,3 +177,6 @@ docs/_config.yml# Byte-compiled / optimized / DLL files *.py[cod] *$py.class +# Test files for new functionality +test-notebook.ipynb +test.py diff --git a/pytabular/__init__.py b/pytabular/__init__.py index 5fcb03c..f2de7d9 100644 --- a/pytabular/__init__.py +++ b/pytabular/__init__.py @@ -69,5 +69,7 @@ from .best_practice_analyzer import BPA from .query import Connection from .pbi_helper import find_local_pbi_instances +from .document import ModelDocumenter + logger.info("Import successful...") diff --git a/pytabular/column.py b/pytabular/column.py index 6be807d..fa60cb4 100644 --- a/pytabular/column.py +++ b/pytabular/column.py @@ -35,6 +35,11 @@ def __init__(self, object, table) -> None: self._display.add_row("State", str(self._object.State)) self._display.add_row("DisplayFolder", str(self._object.DisplayFolder)) + def get_dependencies(self) -> pd.DataFrame: + """Returns the dependant columns of a measure""" + dmv_query = f"select * from $SYSTEM.DISCOVER_CALC_DEPENDENCY where [OBJECT] = '{self.Name}' and [TABLE] = '{self.Table.Name}'" + return self.Table.Model.Query(dmv_query) + def get_sample_values(self, top_n: int = 3) -> pd.DataFrame: """Get sample values of column.""" column_to_sample = f"'{self.Table.Name}'[{self.Name}]" diff --git a/pytabular/culture.py b/pytabular/culture.py index 5fa7bca..01de1cc 100644 --- a/pytabular/culture.py +++ b/pytabular/culture.py @@ -17,25 +17,41 @@ def __init__(self, object, model) -> None: super().__init__(object) self.Model = model self._display.add_row("Culture Name", self._object.Name) - self.ObjectTranslations = PyObjectTranslations( - [ - PyObjectTranslation(translation, self) - for translation in self._object.ObjectTranslations.GetEnumerator() - ] - ) - - -class PyObjectTranslation(PyObject): - """Wrapper for [ObjectTranslation](https://learn.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.objecttranslation?view=analysisservices-dotnet)""" - - def __init__(self, object, culture) -> None: - self.Name = object.Object.Name - self.ObjectType = object.Object.ObjectType - self.Parent = object.Object.Parent - super().__init__(object) - self.Culture = culture - self._display.add_row("Object Property", str(self._object.Property)) - self._display.add_row("Object Value", self._object.Value) + self.ObjectTranslations = self.set_translation() + + def set_translation(self) -> list[dict]: + """ + Based on the culture, it creates a list of dicts + with all the available translations in the file. + """ + return [ + { + "object_translation": translation.Value, + "object_name": translation.Object.Name, + "object_parent_name": translation.Object.Parent.Name, + "object_type": str(translation.Property), + } + for translation in self._object.ObjectTranslations + ] + + def get_translation( + self, object_name: str, object_parent_name: str, object_type: str = "Caption" + ) -> dict: + """ + Get Translation makes it possible to seach a specific translation of an object. + By default it will search for the "Caption" object type, due to fact that a + Display folder and Description can also have translations. + """ + if translations := [ + d + for d in self.ObjectTranslations + if d["object_name"] == object_name + and d["object_type"] == object_type + and d["object_parent_name"] == object_parent_name + ]: + return translations[0] + + return {"object_not_found": "Not Available"} class PyCultures(PyObjects): @@ -43,10 +59,3 @@ class PyCultures(PyObjects): def __init__(self, objects) -> None: super().__init__(objects) - - -class PyObjectTranslations(PyObjects): - """Houses grouping of `PyObjectTranslation`.""" - - def __init__(self, objects) -> None: - super().__init__(objects) diff --git a/pytabular/document.py b/pytabular/document.py new file mode 100644 index 0000000..ccec1e4 --- /dev/null +++ b/pytabular/document.py @@ -0,0 +1,509 @@ +""" +`document.py` is where a specific part of pytabular start. This module can +generate pages in markdown for use in Docusaurus. +""" +import logging + +from pathlib import Path + +from measure import PyMeasure +from table import PyTable +from column import PyColumn +from culture import PyCulture + +from .pytabular import Tabular + +logger = logging.getLogger("PyTabular") + + +class ModelDocumenter: + """ + The ModelDocumenter class can generate documentation based on the + tabular object model and it will generate it suitable for docusaurus. + + TODO: Add a General Pages template with Roles and RLS Expressions + TODO: Create a Sub Page per table for all columns, instead of one big page? + TODO: Add Depencies per Measure with correct links. + """ + + def __init__( + self, + model: Tabular, + friendly_name: str = str(), + save_location: str = "docs", + general_page_url: str = "1-general-information.md", + measure_page_url: str = "2-measures.md", + table_page_url: str = "3-tables.md", + column_page_url: str = "4-columns.md", + roles_page_url: str = "5-roles.md", + ): + self.model = model + self.model_name = friendly_name or model.Catalog or model.Database.Name + self.friendly_name: str = str() + self.save_path: Path + self.save_location: str = save_location + + # Translation information + self.culture_include: bool = False + self.culture_selected: str = "en-US" + self.culture_object: PyCulture = None + + # Documentation Parts + self.general_page: str = str() + self.measure_page: str = str() + self.table_page: str = str() + self.column_page: str = str() + self.roles_page: str = str() + + self.category_file_name: str = "_category_.yml" + self.general_page_url: str = general_page_url + self.measure_page_url: str = measure_page_url + self.table_page_url: str = table_page_url + self.column_page_url: str = column_page_url + self.roles_page_url: str = roles_page_url + + # Generate an url friendly name for the model / folder + self.friendly_name: str = self.set_model_friendly_name() + + # Initialize Save path so checks can be run against it. + self.save_path = self.set_save_path() + + def create_object_reference(self, object: str, object_parent: str) -> str: + """ + Create a Custom ID for link sections in the docs. + :This is based on the technical names in the model, + so not the once in the translations. This makes it + possible to link based on dependencies. + """ + url_reference = f"{object_parent}-{object}".replace(" ", "") + return f"{{#{url_reference}}}" + + def generate_documentation_pages(self) -> None: + """ + Generate Documentation for each specific part of the model. + """ + self.measure_page = self.generate_markdown_measure_page() + self.table_page = self.generate_markdown_table_page() + self.column_page = self.generate_markdown_column_page() + self.category_page = self.generate_category_file() + + def get_object_caption(self, object_name: str, object_parent: str): + """ + Retrieves the caption of an object, based on the translations + in the culture. + """ + + if self.culture_include: + return self.culture_object.get_translation( + object_name=object_name, object_parent_name=object_parent + ).get("object_translation") + + return object_name + + def set_translations( + self, enable_translations: bool = False, culture: str = "en-US" + ) -> None: + """ + Set translations to active or inactive, depending on the needs of the users. + """ + + logger.info(f"Using Translations set to > {enable_translations}") + + if enable_translations: + try: + self.culture_object = self.model.Cultures[culture] + self.culture_selected = culture + self.culture_include = enable_translations + except IndexError: + self.culture_include = False + logger.warn( + "Culture not found, reverting back to orginal setting > False" + ) + else: + logger.info(f"Setting culture to {self.culture_selected}") + + else: + self.culture_include = enable_translations + + def set_model_friendly_name(self): + """ + Replaces the model name to a friendly string, + so it can be used in an URL. + """ + return (self.model_name).replace(" ", "-").replace("_", "-").lower() + + def set_save_path(self) -> Path: + """ + Set the location of the documentation + """ + return Path(f"{self.save_location}/{self.friendly_name}") + + def save_page(self, content: str, page_name: str, keep_file: bool = False) -> None: + """Save the content of the documentation to a file, based on + the class setup. + - Save Location + - Model Friendly Name + - Page to be written + + Args: + content (str): File content to write to file. + page_name (str): Name of the file that will be used. + keep_file (bool): The file will only be overwritten if + the keep_file is set to False. + + Returns: + None + + """ + target_file = self.save_path / page_name + + if keep_file and target_file.exists(): + logger.info(f"{page_name} already exists -> fill will not overwritten.") + else: + logger.info(f"Results are written to -> {page_name}.") + + with target_file.open("w", encoding="utf-8") as f: + f.write(content) + f.close() + + def save_documentation(self) -> None: + """Generate documentation of the model, based on the meta-data + in the model definitions. This first checks if the folder + exists, and then starts to export the files that are needed + for the documentatation. + - General Information Page -> Free format page to create. + - Measure Page -> Describes the measures in the model. (Incl. OLS?) + - Tables Page -> Describes the tables in the model. (Incl. OLS?) + - Columns Page -> Describes all columns in the model. (Incl. OLS?) + - Roles Page -> Describes the roles in the model, (incl. RLS?) + + Args: + self (Docs): Model object for documentation. + + Returns: + None + """ + if self.save_path.exists(): + logger.info( + f"Path exists -> Generating documentation for {self.friendly_name}" + ) + else: + logger.info( + f"Path does not exist -> Creating directory for {self.friendly_name}" + ) + self.save_path.mkdir(parents=True, exist_ok=True) + + if self.category_page: + self.save_page( + content=self.category_page, + keep_file=True, + page_name=self.category_file_name, + ) + + # Create General information page. + if self.general_page: + self.save_page( + content="General Info", keep_file=True, page_name=self.general_page_url + ) + + if self.measure_page: + self.save_page( + content=self.measure_page, + keep_file=False, + page_name=self.measure_page_url, + ) + + if self.table_page: + self.save_page( + content=self.table_page, keep_file=False, page_name=self.table_page_url + ) + + if self.column_page: + self.save_page( + content=self.column_page, + keep_file=False, + page_name=self.column_page_url, + ) + + if self.roles_page: + self.save_page( + content=self.roles_page, keep_file=False, page_name=self.roles_page_url + ) + + def create_markdown_for_measure(self, object: PyMeasure) -> str: + """ + Create Markdown for a specific measure, that can later on be used + for generating the whole measure page. + """ + object_caption = ( + self.get_object_caption( + object_name=object.Name, object_parent=object.Parent.Name + ) + or object.Name + ) + + object_description = (object.Description or "No Description available").replace( + "\\n", "" + ) + + return f""" +### {object_caption} +**Description**: +> {object_description} + +
+ +
Display Folder
+
{object.DisplayFolder}
+ +
Table Name
+
{object.Parent.Name}
+ +
Format String
+
{object.FormatString}
+ +
Is Hidden
+
{object.IsHidden}
+ +
+ +```dax title="Technical: {object.Name}" +{ + object.Expression +} +``` + +--- +""" + + def generate_markdown_measure_page(self) -> str: + """ + Based on the measure objects it generates a page based on + the docusaurus notation for markdown pages. + """ + prev_display_folder = "" + markdown_template = [ + f"""--- +sidebar_position: 3 +title: Measures +description: This page contains all measures for the {self.model.Name} model, including the description, format string, and other technical details. +--- + +# Measures for {self.model.Name} +""" + ] + + measures = sorted( + self.model.Measures, key=lambda x: x.DisplayFolder, reverse=False + ) + + for measure in measures: + logger.debug(f"Creating docs for {measure.Name}") + display_folder = measure.DisplayFolder or "Other" + display_folder = display_folder.split("\\")[0] + + if prev_display_folder != display_folder: + markdown_template.append(f"""## {display_folder}""") + prev_display_folder = display_folder + + markdown_template.append(self.create_markdown_for_measure(measure)) + + return "".join(markdown_template) + + def create_markdown_for_table(self, object: PyTable) -> str: + """ + This functions returns the markdwon that can be used + for the documentation page for Tables. + + Args: + object: PyTable -> Based on the PyTabular Package. + + Returns: + Markdown text: str -> Will be append to the page text. + """ + object_caption = ( + self.get_object_caption( + object_name=object.Name, object_parent=object.Parent.Name + ) + or object.Name + ) + + object_description = (object.Description or "No Description available").replace( + "\\n", "" + ) + + partition_type = '' + partition_source = '' + + if str(object.Partitions[0].SourceType) == 'Calculated': + partition_type = 'dax' + partition_source = object.Partitions[0].Source.Expression + elif str(object.Partitions[0].SourceType) == 'M': + partition_type = 'powerquery' + partition_source = object.Partitions[0].Source.Expression + else: + partition_type = 'sql' + partition_source = object.Partitions[0].Source.Query + + return f""" +### {object_caption} +**Description**: +> {object_description} + +
+
Measures (#)
+
{len(object.Measures)}
+ +
Columns (#)
+
{len(object.Columns)}
+ +
Partitions (#)
+
{len(object.Partitions)}
+ +
Data Category
+
{object.DataCategory or "Regular Table"}
+ +
Is Hidden
+
{object.IsHidden}
+ +
Table Type
+
{object.Partitions[0].ObjectType}
+ +
Source Type
+
{object.Partitions[0].SourceType}
+
+ +```{partition_type} title="Table Source: {object.Name}" +{ + partition_source +} +``` + +--- + +""" + + def generate_markdown_table_page(self) -> str: + """ + This function generates the markdown tables documentation for the tables in the Model. + """ + markdown_template = f"""--- +sidebar_position: 2 +title: Tables +description: This page contains all columns with tables for {self.model.Name}, including the description, and technical details. +--- + +# Tables {self.model.Name} + + """ + + for table in self.model.Tables: + markdown_template += self.create_markdown_for_table(table) + + return markdown_template + + def generate_markdown_column_page(self) -> str: + """ + This function generates the markdown for documentation about columns in the Model. + """ + markdown_template = f"""--- +sidebar_position: 4 +title: Columns +description: This page contains all columns with Columns for {self.model.Name}, including the description, format string, and other technical details. +--- + + """ + + for table in self.model.Tables: + markdown_template += f""" +## Columns for {table.Name} + """ + + for column in table.Columns: + if "RowNumber" in column.Name: + continue + + markdown_template += self.create_markdown_for_column(column) + + return markdown_template + + def create_markdown_for_column(self, object: PyColumn) -> str: + """ + Generates the Markdown for a specifc column. + If a colums is calculated, then it also shows + the expression for that column in DAX. + """ + object_caption = ( + self.get_object_caption( + object_name=object.Name, object_parent=object.Parent.Name + ) + or object.Name + ) + + object_description = (object.Description or "No Description available").replace( + "\\n", "" + ) + + basic_info = f""" +### {object_caption} {self.create_object_reference(object=object.Name, object_parent=object.Parent.Name)} +**Description**: +> {object_description} + +
+
Column Name
+
{object.Name}
+ +
Object Type
+
{object.ObjectType}
+ +
Type
+
{object.Type}
+ +
Is Available In Excel
+
{object.IsAvailableInMDX}
+ +
Is Hidden
+
{object.IsHidden}
+ +
Data Category
+
{object.DataCategory}
+ +
Data Type
+
{object.DataType}
+ +
DisplayFolder
+
{object.DisplayFolder}
+ +
+""" + + if str(object.Type) == "Calculated": + basic_info += f""" +```dax title="Technical: {object.Name}" +{ + object.Expression +} +``` + """ + return ( + basic_info + + """ +--- + """ + ) + + def generate_category_file(self): + """ + Docusaurs can generate an index. The category yaml will + make that happen. + """ + return f"""position: 2 # float position is supported +label: '{self.model_name}' +collapsible: true # make the category collapsible +collapsed: true # keep the category open by default +link: + type: generated-index + title: Documentation Overview +customProps: + description: To be added in the future. +""" diff --git a/pytabular/logic_utils.py b/pytabular/logic_utils.py index b858370..ac5e7c2 100644 --- a/pytabular/logic_utils.py +++ b/pytabular/logic_utils.py @@ -97,7 +97,7 @@ def m_list_expression_generator(list_of_strings: List[str]) -> str: f"Iterating through rows to build expression... df has {len(df)} rows..." ) expression_list_rows = [] - for index, row in df.iterrows(): + for _, row in df.iterrows(): expression_list_rows += [m_list_expression_generator(row.to_list())] expression_str += f"\u007b\n{','.join(expression_list_rows)}\n\u007d)\nin\nSource" return expression_str @@ -177,12 +177,17 @@ def get_value_to_df(Query: AdomdDataReader, index: int): return Query.GetValue(index) -def dataframe_to_dict(df): - """ - Convert to Dataframe to dictionary and - alter columns names with; +def dataframe_to_dict(df: pd.DataFrame) -> list[dict]: + """Convert to Dataframe to dictionary and alter columns names with; - Underscores (_) to spaces - All Strings are converted to Title Case. + + Args: + df (pd.DataFrame): Original table that needs to be converted + to a list with dicts. + + Returns: + list of dictionaries. """ list_of_dicts = df.to_dict("records") return [ @@ -191,14 +196,18 @@ def dataframe_to_dict(df): ] -def dict_to_markdown_table(list_of_dicts: list, columns_to_include: list = None): +def dict_to_markdown_table(list_of_dicts: list, columns_to_include: list = None) -> str: """ Description: Generate a Markdown table based on a list of dictionaries. Args: - list_of_dicts -> List of Dictionaries that need to be converted - to a markdown table. - columns_to_include -> Default = None, and all colums are included. - If a list is supplied, those columns will be included. + list_of_dicts (list): List of Dictionaries that need to be converted + to a markdown table. + columns_to_include (list): Default = None, and all colums are included. + If a list is supplied, those columns will be included. + + Returns: + String that will represent a table in Markdown. + Example: columns = ['Referenced Object Type', 'Referenced Table', 'Referenced Object'] dict_to_markdown_table(dependancies, columns) diff --git a/pytabular/measure.py b/pytabular/measure.py index e54c553..3999547 100644 --- a/pytabular/measure.py +++ b/pytabular/measure.py @@ -27,7 +27,16 @@ def __init__(self, object, table) -> None: self._display.add_row("FormatString", self._object.FormatString) def get_dependencies(self) -> pd.DataFrame: - """Returns the dependant columns of a measure""" + """Returns the dependant objects of a measure. + Args: + self: The Measure Object + + Returns: + pd.DataFrame: The Return Value is a Pandas dataframe + which displays all the dependancies + of the object. + + """ dmv_query = f"select * from $SYSTEM.DISCOVER_CALC_DEPENDENCY where [OBJECT] = '{self.Name}' and [TABLE] = '{self.Table.Name}'" return self.Table.Model.Query(dmv_query) diff --git a/pytabular/pbi_helper.py b/pytabular/pbi_helper.py index 6c45035..016e9d3 100644 --- a/pytabular/pbi_helper.py +++ b/pytabular/pbi_helper.py @@ -15,15 +15,29 @@ def get_msmdsrv() -> list: list: returns ProcessId(s) in list format to account for multiple PBIX files open at the same time. """ p.logger.debug("Retrieving msmdsrv.exe(s)") - msmdsrv = subprocess.check_output( - [ - "powershell", - """Get-CimInstance -ClassName Win32_Process -Property * -Filter "Name = 'msmdsrv.exe'" | Select-Object -Property ProcessId -ExpandProperty ProcessId""", - ] - ) - msmdsrv_id = msmdsrv.decode().strip().splitlines() - p.logger.debug(f"ProcessId for msmdsrv.exe {msmdsrv_id}") - return msmdsrv_id + + try: + msmdsrv = subprocess.check_output( + [ + "powershell", + """Get-CimInstance -ClassName Win32_Process -Property * -Filter "Name = 'msmdsrv.exe'" | Select-Object -Property ProcessId -ExpandProperty ProcessId""", + ] + ) + + msmdsrv_id = msmdsrv.decode().strip().splitlines() + p.logger.debug(f"ProcessId for msmdsrv.exe {msmdsrv_id}") + return msmdsrv_id + + except subprocess.CalledProcessError as e: + p.logger.error( + f"command '{e.cmd}' return with error (code {e.returncode}): {e.output}" + ) + p.logger.warn( + "Check if powershell is availabe in the PATH environment variables." + ) + raise RuntimeError( + f"command '{e.cmd}' return with error (code {e.returncode}): {e.output}" + ) from e def get_port_number(msmdsrv: str) -> str: diff --git a/pytabular/pytabular.py b/pytabular/pytabular.py index 9b85bbd..636802b 100644 --- a/pytabular/pytabular.py +++ b/pytabular/pytabular.py @@ -82,6 +82,7 @@ def __init__(self, CONNECTION_STR: str): self.Adomd: Connection = Connection(self.Server) self.Effective_Users: dict = {} self.PyRefresh = PyRefresh + # Build PyObjects self.Reload_Model_Info() @@ -116,13 +117,6 @@ def Reload_Model_Info(self) -> bool: """ self.Database.Refresh() - self.Cultures = PyCultures( - [ - PyCulture(culture, self) - for culture in self.Model.Cultures.GetEnumerator() - ] - ) - self.Tables = PyTables( [PyTable(table, self) for table in self.Model.Tables.GetEnumerator()] ) @@ -141,6 +135,13 @@ def Reload_Model_Info(self) -> bool: self.Measures = PyMeasures( [measure for table in self.Tables for measure in table.Measures] ) + + self.Cultures = PyCultures( + [ + PyCulture(culture, self) + for culture in self.Model.Cultures.GetEnumerator() + ] + ) return True def Is_Process(self) -> bool: diff --git a/pytabular/refresh.py b/pytabular/refresh.py index eb650f5..b46f51f 100644 --- a/pytabular/refresh.py +++ b/pytabular/refresh.py @@ -367,7 +367,7 @@ def _refresh_report(self, Property_Changes) -> pd.DataFrame: refresh_data, columns=["Table", "Partition", "Refreshed Time"] ) - def Run(self) -> None: + def Run(self) -> pd.DataFrame: """Brings it all together. When ready, executes all the pre checks. Then refreshes. Then runs all the post checks. """ diff --git a/test/adventureworks/AdventureWorks Sales.pbix b/test/adventureworks/AdventureWorks Sales.pbix index cf8791e..53ec09f 100644 Binary files a/test/adventureworks/AdventureWorks Sales.pbix and b/test/adventureworks/AdventureWorks Sales.pbix differ