Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 30 additions & 17 deletions pytabular/pytabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@


class Tabular(PyObject):
"""Tabular Class to perform operations.
"""This is the Tabular Class to perform operations.

This is the main class to work with in PyTabular.
You can connect to the other classes via the supplied attributes.
Expand All @@ -50,13 +50,14 @@ class Tabular(PyObject):
[link](https://learn.microsoft.com/en-us/analysis-services/instances/connection-string-properties-analysis-services)

Attributes:
AdomdConnection (Connection): For querying.
This is the `Connection` class.
Tables (PyTables): See `PyTables` for more information.
Iterate through your tables in your model.
Columns (PyColumns): See `PyColumns` for more information.
Partitions (PyPartitions): See `PyPartitions` for more information.
Measures (PyMeasures): See `PyMeasures` for more information.
Adomd (Connection): For querying.
This is the `Connection` class.
Tables (PyTables): See `PyTables` for more information.
Iterate through your tables in your model.
Columns (PyColumns): See `PyColumns` for more information.
Partitions (PyPartitions): See `PyPartitions` for more information.
Measures (PyMeasures): See `PyMeasures` for more information.
PyRefresh (PyRefresh): See `PyRefresh` for more information.
"""

def __init__(self, connection_str: str):
Expand Down Expand Up @@ -85,7 +86,7 @@ def __init__(self, connection_str: str):
logger.info(f"Connected to Model - {self.Model.Name}")
self.Adomd: Connection = Connection(self.Server)
self.effective_users: dict = {}
self.PyRefresh = PyRefresh
self.PyRefresh: PyRefresh = PyRefresh

# Build PyObjects
self.reload_model_info()
Expand Down Expand Up @@ -158,7 +159,7 @@ def is_process(self) -> bool:
to see if any processing is happening.

Returns:
bool: True if DMV shows Process, False if not.
bool: True if DMV shows Process, False if not.
"""
_jobs_df = self.query("select * from $SYSTEM.DISCOVER_JOBS")
return len(_jobs_df[_jobs_df["JOB_DESCRIPTION"] == "Process"]) > 0
Expand Down Expand Up @@ -243,7 +244,7 @@ def property_changes(property_changes_var):
)

def backup_table(self, table_str: str) -> bool:
"""Will be removed.
"""This will be removed.

Used in conjunction with `revert_table()`.
"""
Expand Down Expand Up @@ -338,7 +339,7 @@ def clone_role_permissions():
return True

def revert_table(self, table_str: str) -> bool:
"""Will be removed.
"""This will be removed.

This is used in conjunction with `backup_table()`.
"""
Expand Down Expand Up @@ -428,19 +429,31 @@ def dename(items):
def query(
self, query_str: str, effective_user: str = None
) -> Union[pd.DataFrame, str, int]:
"""Executes query on model.
"""Executes a query on model.

See `Connection().query()` for details on execution.

Args:
query_str (str): Query string to execute.
effective_user (str, optional): Pass through an effective user
if desired. It will create and store a new `Connection()` class if need,
which will help with speed if looping through multiple users in a row.
Defaults to None.
if desired. It will create and store a new `Connection()` class if need,
which will help with speed if looping through multiple users in a row.
Defaults to None.

Returns:
Union[pd.DataFrame, str, int]: _description_
Union[pd.DataFrame, str, int]: Depending on query, will return DataFrame
or single value.
Example:
```python
model.query("EVALUATE {1}")

model.query("EVALUATE TOPN(5, 'Customer')")

model.query(
"EVALUATE VALUES('Sales Region'[Region])",
effective_user = "user@company.com"
)
```
"""
if effective_user is None:
return self.Adomd.query(query_str)
Expand Down
14 changes: 14 additions & 0 deletions pytabular/query.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
"""`query.py` houses a custom `Connection` class that uses the .Net AdomdConnection.

`Connection` is created automatically when connecting to your model.

Example:
```python title="query from model"
import pytabular as p
model = p.Tabular(CONNECTION_STR)
model.query("EVALUATE {1}")
```

```python title="pass an effective user"
model.query(
"EVALUATE {1}",
effective_user = "user@company.com"
)
```
"""
import logging
import os
Expand Down
27 changes: 26 additions & 1 deletion pytabular/refresh.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,31 @@
"""`refresh.py` is the main file to handle all the components of refreshing your model.

See the `PyTable(s)` and `PyPartition(s)` classes. Use the `refresh` method from their.
You have may ways to interact with refreshing.

Example:
```python title="refresh from model"
import pytabular as p
model = p.Tabular(CONNECTION_STR)
model.refresh('Table Name')
```

```python title="refresh from PyTables"
model.Tables.find("fact").refresh() # (1)
```

1. Refresh all tables with 'fact' in the name.

```python title="refresh from PyTable"
model.Tables['Sales'].refresh()
```

```python title="refresh from PyPartitions"
model.Tables['Large Sales Fact'].Partitions.refresh()
```

```python title="refresh from PyPartition"
model.Tables['Sales'].Partitions['Last Fiscal Year'].refresh()
```
"""
from tabular_tracing import RefreshTrace, BaseTrace
import logging
Expand Down
84 changes: 62 additions & 22 deletions pytabular/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,26 @@
class PyTable(PyObject):
"""The main PyTable class to interact with the tables in model.

Notice the `PyObject` magic method `__getattr__()` will search in `self._object`
if it is unable to find it in the default attributes.
This let's you also easily check the default .Net properties.
Attributes:
Name (str): Name of table.
IsHidden (bool): Is the table hidden.
Description (str): The description of the table.
Model (Tabular): The parent `Tabular()` class.
Partitions (PyPartitions): The `PyPartitions()` in the table.
Columns (PyColumns): The `PyColumns()` in the table.
Measures (PyMeasures): The `PyMeasures()` in the table.

Example:
```python title="Passing through PyTable to PyPartition"

model.Tables[0].Partitions['Last Year'].refresh() # (1)
```

1. This shows the ability to travel through your model
to a specific partition and then running a refresh
for that specific partition.
`model` -> `PyTables` -> `PyTable` (1st index) -> `PyPartitions`
-> `PyPartition` (.Name == 'Last Year') -> `.refresh()`
"""

def __init__(self, object, model) -> None:
Expand All @@ -34,16 +51,16 @@ def __init__(self, object, model) -> None:
"""
super().__init__(object)
self.Model = model
self.Partitions = PyPartitions(
self.Partitions: PyPartitions = PyPartitions(
[
PyPartition(partition, self)
for partition in self._object.Partitions.GetEnumerator()
]
)
self.Columns = PyColumns(
self.Columns: PyColumns = PyColumns(
[PyColumn(column, self) for column in self._object.Columns.GetEnumerator()]
)
self.Measures = PyMeasures(
self.Measures: PyMeasures = PyMeasures(
[
PyMeasure(measure, self)
for measure in self._object.Measures.GetEnumerator()
Expand Down Expand Up @@ -72,16 +89,28 @@ def row_count(self) -> int:

Returns:
int: Number of rows using `COUNTROWS`.

Example:
```python
model.Tables['Table Name'].row_count()
```
"""
return self.Model.Adomd.query(f"EVALUATE {{COUNTROWS('{self.Name}')}}")

def refresh(self, *args, **kwargs) -> pd.DataFrame:
"""Use this to refresh the PyTable in question.
"""Use this to refresh the PyTable.

You can pass through any extra parameters. For example:
`Tabular().Tables['Table Name'].Refresh(trace = None)`
Returns:
pd.DataFrame: Returns pandas dataframe with some refresh details.

Example:
```python
model.Tables['Table Name'].refresh()

model.Tables['Table Name'].refresh(trace = None) # (1)
```

1. You can pass through arguments to `PyRefresh`, like removing trace.
"""
return self.Model.refresh(self, *args, **kwargs)

Expand All @@ -106,11 +135,8 @@ def related(self):
class PyTables(PyObjects):
"""Groups together multiple tables.

See `PyObjects` class for what more it can do.
You can interact with `PyTables` straight from model. For ex: `model.Tables`.
You can even filter down with `.Find()`.
For example find all tables with `fact` in name.
`model.Tables.Find('fact')`.
You can interact with `PyTables` straight from model.
You can even filter down with `.find()`.
"""

def __init__(self, objects) -> None:
Expand All @@ -135,6 +161,16 @@ def query_all(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame:

Returns:
pd.DataFrame: Returns dataframe with results

Example:
```python
model.Tables.find('fact').query_all() # (1)
```

1. Because `.find()` will return the `PyObjects` you are searching in,
another `PyTables` is returned, but reduced to just
the `PyTable`(s) with the 'fact' in the name. Then will
get the # of rows for each table.
"""
logger.info("Querying every table in PyTables...")
logger.debug(f"Function to be run: {query_function}")
Expand All @@ -148,8 +184,12 @@ def query_all(self, query_function: str = "COUNTROWS(_)") -> pd.DataFrame:
query_str = f"{query_str[:-2]})"
return self[0].Model.query(query_str)

def find_zero_rows(self):
"""Returns PyTables class of tables with zero rows queried."""
def find_zero_rows(self) -> "PyTables":
"""Returns PyTables class of tables with zero rows queried.

Returns:
PyTables: A subset of the `PyTables` that contains zero rows.
"""
query_function: str = "COUNTROWS(_)"
df = self.query_all(query_function)

Expand All @@ -168,14 +208,14 @@ def last_refresh(self, group_partition: bool = True) -> pd.DataFrame:
`model.Create_Table(p.Table_Last_Refresh_Times(model),'RefreshTimes')`.

Args:
group_partition (bool, optional): Whether or not you want
the grain of the dataframe to be by table or by partition.
Defaults to True.
group_partition (bool, optional): Whether or not you want
the grain of the dataframe to be by table or by partition.
Defaults to True.

Returns:
pd.DataFrame: pd dataframe with the RefreshedTime property
If group_partition == True and the table has
multiple partitions, then df.groupby(by["tables"]).max()
pd.DataFrame: pd dataframe with the RefreshedTime property
If group_partition == True and the table has
multiple partitions, then df.groupby(by["tables"]).max()
"""
data = {
"Tables": [
Expand Down