diff --git a/.flake8 b/.flake8 index d24977e..9214cb9 100644 --- a/.flake8 +++ b/.flake8 @@ -1,2 +1,2 @@ [flake8] -ignore = E501, E203, W503, F401 \ No newline at end of file +ignore = E501, W503 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 187ab13..f4068c5 100644 --- a/.gitignore +++ b/.gitignore @@ -167,6 +167,7 @@ __pycache__ /TE2 setup.py notes.txt +adhoc.py *.bim *.csv docs/_config.yml# Byte-compiled / optimized / DLL files diff --git a/README.md b/README.md index 00d6753..6f51858 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,12 @@ import pytabular model = pytabular.Tabular(CONNECTION_STR) ``` +I'm a big fan of logging, if you don't want any just get the logger and disable it. +```python +import pytabular +pytabular.logger.disabled = True +``` + You can query your models with the Query method from your tabular class. For Dax Queries, it will need the full Dax syntax. See [EVALUATE example](https://dax.guide/st/evaluate/). This will return a [Pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html). If you are looking to return a single value, see below. Simply wrap your query in the the curly brackets. The method will take that single cell table and just return the individual value. You can also query your DMV. See below for example. See [PyTabular Docs for Query](https://curts0.github.io/PyTabular/Tabular/#query). ```python #Run basic queries @@ -80,10 +86,31 @@ model.Refresh({'Table Name':'Partition Name'}) #or any kind of weird combination like model.Refresh([{:,'Table Name':['Partition1','Partition2']},'Table Name','Table Name2']) +#You can even run through the Tables & Partition Attributes +model.Tables['Table Name'].Refresh() + +#or +model.Tables['Table Name'].Partitions['Partition Name'].Refresh() + #Add Tracing=True for simple Traces tracking the refresh. model.Refresh(['Table1','Table2'], Tracing=True) ``` +It's not uncommon to need to run through some checks on specific Tables, Partitions, Columns, Etc... +```python +#Get Row Count from model +model.Tables['Table Name'].Row_Count() + +#Get Last Refresh time from a partition +model.Tables['Table Name'].Last_Refresh() + +#Get Distinct Count or Values from a Column +model.Tables['Table Name'].Columns['Column Name'].Distinct_Count() +#or +model.Tables['Table Name'].Columns['Column Name'].Values() +``` + + ### Use Cases #### If blank table, then refresh table. diff --git a/pyproject.toml b/pyproject.toml index 9be8a69..4e3aab6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "python_tabular" -version = "0.1.6" +version = "0.1.7" authors = [ { name="Curtis Stallings", email="curtisrstallings@gmail.com" }, ] @@ -13,7 +13,8 @@ dependencies = [ "clr-loader==0.1.7", "xmltodict==0.13.0", "pandas", - "requests" + "requests", + "rich" ] description = "Connect to your tabular model and perform operations programmatically" readme = "README.md" diff --git a/pytabular/__init__.py b/pytabular/__init__.py index 64947ed..02ddecf 100644 --- a/pytabular/__init__.py +++ b/pytabular/__init__.py @@ -3,11 +3,10 @@ import os import sys import platform +from rich.logging import RichHandler logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s::%(module)s::%(funcName)s::%(levelname)s::%(message)s", - datefmt="%y/%m/%d %H:%M:%S %z", + level=logging.DEBUG, format="%(message)s", datefmt="[%x]", handlers=[RichHandler()] ) logger = logging.getLogger("PyTabular") logger.setLevel(logging.INFO) diff --git a/pytabular/basic_checks.py b/pytabular/basic_checks.py index 8a1986d..9fe46c8 100644 --- a/pytabular/basic_checks.py +++ b/pytabular/basic_checks.py @@ -4,6 +4,7 @@ from logic_utils import ticks_to_datetime import sys import pandas as pd +from measure import PyMeasure logger = logging.getLogger("PyTabular") @@ -86,7 +87,7 @@ def BPA_Violations_To_DF(model: pytabular.Tabular, te2: str, bpa: str) -> pd.Dat def Last_X_Interval( Model: pytabular.Tabular, - Measure: Union[str, pytabular.pytabular.Measure], + Measure: Union[str, PyMeasure], Column_Name: Union[str, None] = None, Date_Column_Identifier: str = "'Date'[DATE_DTE_KEY]", Number_Of_Intervals: int = 90, @@ -105,7 +106,7 @@ def Last_X_Interval( Returns: pd.DataFrame: Pandas DataFrame of results. """ - if isinstance(Measure, str): + if isinstance(PyMeasure, str): try: Measure = [ measure for measure in Model.Measures if measure.Name == Measure diff --git a/pytabular/column.py b/pytabular/column.py index ddfd248..cdc9c22 100644 --- a/pytabular/column.py +++ b/pytabular/column.py @@ -1,9 +1,7 @@ import logging - -from object import PyObject, PyObjects -from logic_utils import ticks_to_datetime import pandas as pd -from typing import List +from object import PyObject, PyObjects + logger = logging.getLogger("PyTabular") @@ -20,7 +18,15 @@ def __init__(self, object, table) -> None: super().__init__(object) self.Table = table - def Distinct_Count(self, No_Blank=False): + def Distinct_Count(self, No_Blank=False) -> int: + """Get [DISTINCTCOUNT](https://learn.microsoft.com/en-us/dax/distinctcount-function-dax) of Column. + + Args: + No_Blank (bool, optional): Ability to call [DISTINCTCOUNTNOBLANK](https://learn.microsoft.com/en-us/dax/distinctcountnoblank-function-dax). Defaults to False. + + Returns: + int: Number of Distinct Count from column. If `No_Blank == True` then will return number of Distinct Count no blanks. + """ func = "DISTINCTCOUNT" if No_Blank: func += "NOBLANK" @@ -28,6 +34,16 @@ def Distinct_Count(self, No_Blank=False): f"EVALUATE {{{func}('{self.Table.Name}'[{self.Name}])}}" ) + def Values(self) -> pd.DataFrame: + """Get single column DataFrame of [VALUES](https://learn.microsoft.com/en-us/dax/values-function-dax) + + Returns: + pd.DataFrame: Single Column DataFrame of Values. + """ + return self.Table.Model.Adomd.Query( + f"EVALUATE VALUES('{self.Table.Name}'[{self.Name}])" + ) + class PyColumns(PyObjects): def __init__(self, objects) -> None: diff --git a/pytabular/measure.py b/pytabular/measure.py new file mode 100644 index 0000000..a1e22f4 --- /dev/null +++ b/pytabular/measure.py @@ -0,0 +1,23 @@ +import logging + +from object import PyObject, PyObjects + +logger = logging.getLogger("PyTabular") + + +class PyMeasure(PyObject): + """Wrapper for [Microsoft.AnalysisServices.Measure](https://learn.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.measure?view=analysisservices-dotnet). + With a few other bells and whistles added to it. WIP + + Args: + Table: Parent Table to the Measure + """ + + def __init__(self, object, table) -> None: + super().__init__(object) + self.Table = table + + +class PyMeasures(PyObjects): + def __init__(self, objects) -> None: + super().__init__(objects) diff --git a/pytabular/object.py b/pytabular/object.py index 0a00574..bead097 100644 --- a/pytabular/object.py +++ b/pytabular/object.py @@ -35,3 +35,10 @@ def __iter__(self): def __len__(self): return len(self._objects) + + def Find(self, object_str): + return [ + object + for object in self._objects + if object_str.lower() in object.Name.lower() + ] diff --git a/pytabular/partition.py b/pytabular/partition.py index 1fdf204..1a91aec 100644 --- a/pytabular/partition.py +++ b/pytabular/partition.py @@ -3,7 +3,6 @@ from object import PyObject, PyObjects from logic_utils import ticks_to_datetime import pandas as pd -from typing import List logger = logging.getLogger("PyTabular") diff --git a/pytabular/pytabular.py b/pytabular/pytabular.py index 4077afd..07d05f9 100644 --- a/pytabular/pytabular.py +++ b/pytabular/pytabular.py @@ -8,9 +8,8 @@ DataColumn, Partition, MPartitionSource, - Measure, ) -from Microsoft.AnalysisServices.AdomdClient import AdomdCommand, AdomdConnection + from Microsoft.AnalysisServices import UpdateOptions from typing import Any, Dict, List, Union from collections import namedtuple @@ -26,6 +25,9 @@ ) from query import Connection from table import PyTable, PyTables +from partition import PyPartitions +from column import PyColumns +from measure import PyMeasures from tabular_tracing import Refresh_Trace logger = logging.getLogger("PyTabular") @@ -89,11 +91,15 @@ def Reload_Model_Info(self) -> bool: self.Tables = PyTables( [PyTable(table, self) for table in self.Model.Tables.GetEnumerator()] ) - self.Measures = [ - measure - for table in self.Tables - for measure in table.Measures.GetEnumerator() - ] + self.Partitions = PyPartitions( + [partition for table in self.Tables for partition in table.Partitions] + ) + self.Columns = PyColumns( + [column for table in self.Tables for column in table.Columns] + ) + self.Measures = PyMeasures( + [measure for table in self.Tables for measure in table.Measures] + ) self.Database.Refresh() return True @@ -194,7 +200,7 @@ def find_table(table_str: str) -> Table: result = self.Model.Tables.Find(table_str) if result is None: raise Exception(f"Unable to find table! from {table_str}") - logging.debug(f"Found table {result.Name}") + logger.debug(f"Found table {result.Name}") return result def find_partition(table: Table, partition_str: str) -> Partition: @@ -203,7 +209,7 @@ def find_partition(table: Table, partition_str: str) -> Partition: raise Exception( f"Unable to find partition! {table.Name}|{partition_str}" ) - logging.debug(f"Found partition {result.Table.Name}|{result.Name}") + logger.debug(f"Found partition {result.Table.Name}|{result.Name}") return result def refresh(Object): @@ -619,6 +625,6 @@ def Create_Table(self, df: pd.DataFrame, table_name: str) -> bool: f"Adding table: {new_table.Name} to {self.Server.Name}::{self.Database.Name}::{self.Model.Name}" ) self.Model.Tables.Add(new_table) - self.Refresh([new_table]) + self.Refresh([new_table], Tracing=True) self.SaveChanges() return True diff --git a/pytabular/table.py b/pytabular/table.py index cfcc091..eb9f924 100644 --- a/pytabular/table.py +++ b/pytabular/table.py @@ -1,10 +1,9 @@ import logging - -from Microsoft.AnalysisServices.Tabular import Table from object import PyObject import pandas as pd from partition import PyPartition, PyPartitions from column import PyColumn, PyColumns +from measure import PyMeasure, PyMeasures from pytabular.object import PyObjects logger = logging.getLogger("PyTabular") @@ -32,6 +31,12 @@ def __init__(self, object, model) -> None: self.Columns = PyColumns( [PyColumn(column, self) for column in self._object.Columns.GetEnumerator()] ) + self.Measures = PyMeasures( + [ + PyMeasure(measure, self) + for measure in self._object.Measures.GetEnumerator() + ] + ) def Row_Count(self) -> int: """Method to return count of rows. Simple Dax Query: diff --git a/pytabular/tabular_tracing.py b/pytabular/tabular_tracing.py index 8943cd1..3632255 100644 --- a/pytabular/tabular_tracing.py +++ b/pytabular/tabular_tracing.py @@ -177,11 +177,68 @@ def _Query_DMV_For_Event_Categories(self): return Event_Categories -def refresh_handler(source, args): - if args.EventSubclass == TraceEventSubclass.ReadData: - logger.info(f"{args.ProgressTotal} - {args.ObjectPath}") +def _refresh_handler(source, args): + TextData = args.TextData.replace("", "").replace("", "") + + if ( + args.EventClass == TraceEventClass.ProgressReportCurrent + and args.EventSubclass == TraceEventSubclass.ReadData + ): + logger.info( + f"Total Rows Read {args.ProgressTotal} From Table '{args.ObjectPath.split('.')[-2]}' Partition '{args.ObjectPath.split('.')[-1]}' " + ) + + elif ( + args.EventClass == TraceEventClass.ProgressReportEnd + and args.EventSubclass == TraceEventSubclass.ReadData + ): + if args.ProgressTotal == 0: + logger.warning( + f"{'::'.join(args.ObjectPath.split('.')[-2:])} QUERIED {args.ProgressTotal} ROWS!" + ) + else: + logger.info( + f"Finished Reading {'::'.join(args.ObjectPath.split('.')[-2:])} for {args.ProgressTotal} Rows!" + ) + + elif args.EventSubclass == TraceEventSubclass.SwitchingDictionary: + logger.warning(f"{TextData}") + + elif ( + args.EventClass == TraceEventClass.ProgressReportBegin + and args.EventSubclass + in [ + TraceEventSubclass.TabularSequencePoint, + TraceEventSubclass.TabularRefresh, + TraceEventSubclass.Process, + TraceEventSubclass.VertiPaq, + TraceEventSubclass.CompressSegment, + TraceEventSubclass.TabularCommit, + TraceEventSubclass.RelationshipBuildPrepare, + TraceEventSubclass.AnalyzeEncodeData, + TraceEventSubclass.ReadData, + ] + ): + logger.info(f"{TextData}") + + elif ( + args.EventClass == TraceEventClass.ProgressReportEnd + and args.EventSubclass + in [ + TraceEventSubclass.TabularSequencePoint, + TraceEventSubclass.TabularRefresh, + TraceEventSubclass.Process, + TraceEventSubclass.VertiPaq, + TraceEventSubclass.CompressSegment, + TraceEventSubclass.TabularCommit, + TraceEventSubclass.RelationshipBuildPrepare, + TraceEventSubclass.AnalyzeEncodeData, + ] + ): + logger.info(f"{TextData}") + else: - logger.info(f"{args.EventClass} - {args.EventSubclass} - {args.ObjectName}") + logger.debug(f"{args.EventClass}::{args.EventSubclass}::{TextData}") class Refresh_Trace(Base_Trace): @@ -211,6 +268,6 @@ def __init__( TraceColumn.EventClass, TraceColumn.ProgressTotal, ], - Handler: Callable = refresh_handler, + Handler: Callable = _refresh_handler, ) -> None: super().__init__(Tabular_Class, Trace_Events, Trace_Event_Columns, Handler)