Skip to content

Commit ea05023

Browse files
committed
feat(gooddata-pipelines): add ldm extension logic
1 parent 880b8de commit ea05023

20 files changed

+1948
-0
lines changed

gooddata-pipelines/gooddata_pipelines/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@
1313
from .backup_and_restore.storage.local_storage import LocalStorage
1414
from .backup_and_restore.storage.s3_storage import S3Storage
1515

16+
# -------- LDM Extension --------
17+
from .ldm_extension.ldm_extension_manager import LdmExtensionManager
18+
from .ldm_extension.models.custom_data_object import (
19+
ColumnDataType,
20+
CustomDatasetDefinition,
21+
CustomFieldDefinition,
22+
CustomFieldType,
23+
)
24+
1625
# -------- Provisioning --------
1726
from .provisioning.entities.user_data_filters.models.udf_models import (
1827
UserDataFilterFullLoad,
@@ -65,5 +74,10 @@
6574
"UserDataFilterProvisioner",
6675
"UserDataFilterFullLoad",
6776
"EntityType",
77+
"LdmExtensionManager",
78+
"CustomDatasetDefinition",
79+
"CustomFieldDefinition",
80+
"ColumnDataType",
81+
"CustomFieldType",
6882
"__version__",
6983
]

gooddata-pipelines/gooddata_pipelines/api/gooddata_api.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,44 @@ def get_automations(self, workspace_id: str) -> requests.Response:
174174
)
175175
return self._get(endpoint)
176176

177+
def get_all_metrics(self, workspace_id: str) -> requests.Response:
178+
"""Get all metrics from the specified workspace.
179+
180+
Args:
181+
workspace_id (str): The ID of the workspace to retrieve metrics from.
182+
Returns:
183+
requests.Response: The response containing the metrics.
184+
"""
185+
endpoint = f"/entities/workspaces/{workspace_id}/metrics"
186+
headers = {**self.headers, "X-GDC-VALIDATE-RELATIONS": "true"}
187+
return self._get(endpoint, headers=headers)
188+
189+
def get_all_visualization_objects(
190+
self, workspace_id: str
191+
) -> requests.Response:
192+
"""Get all visualizations from the specified workspace.
193+
194+
Args:
195+
workspace_id (str): The ID of the workspace to retrieve visualizations from.
196+
Returns:
197+
requests.Response: The response containing the visualizations.
198+
"""
199+
endpoint = f"/entities/workspaces/{workspace_id}/visualizationObjects"
200+
headers = {**self.headers, "X-GDC-VALIDATE-RELATIONS": "true"}
201+
return self._get(endpoint, headers=headers)
202+
203+
def get_all_dashboards(self, workspace_id: str) -> requests.Response:
204+
"""Get all dashboards from the specified workspace.
205+
206+
Args:
207+
workspace_id (str): The ID of the workspace to retrieve dashboards from.
208+
Returns:
209+
requests.Response: The response containing the dashboards.
210+
"""
211+
endpoint = f"/entities/workspaces/{workspace_id}/analyticalDashboards"
212+
headers = {**self.headers, "X-GDC-VALIDATE-RELATIONS": "true"}
213+
return self._get(endpoint, headers=headers)
214+
177215
def _get(
178216
self, endpoint: str, headers: dict[str, str] | None = None
179217
) -> requests.Response:
@@ -253,3 +291,15 @@ def _delete(
253291
url = self._get_url(endpoint)
254292

255293
return requests.delete(url, headers=self.headers, timeout=TIMEOUT)
294+
295+
@staticmethod
296+
def raise_if_response_not_ok(*responses: requests.Response) -> None:
297+
"""Check if responses from API calls are OK.
298+
299+
Raises ValueError if any response is not OK (status code not 2xx).
300+
"""
301+
for response in responses:
302+
if not response.ok:
303+
raise ValueError(
304+
f"Request to {response.url} failed with status code {response.status_code}: {response.text}"
305+
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# (C) 2025 GoodData Corporation
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
# (C) 2025 GoodData Corporation
2+
"""Module for processing validated custom datasets and fields data.
3+
4+
This module is responsible for converting validated custom datasets and fields
5+
into objects defined in the GoodData Python SDK.
6+
"""
7+
8+
from gooddata_sdk.catalog.identifier import (
9+
CatalogDatasetWorkspaceDataFilterIdentifier,
10+
CatalogGrainIdentifier,
11+
CatalogReferenceIdentifier,
12+
)
13+
from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.data_filter_references import (
14+
CatalogDeclarativeWorkspaceDataFilterReferences,
15+
)
16+
from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.dataset.dataset import (
17+
CatalogDataSourceTableIdentifier,
18+
CatalogDeclarativeAttribute,
19+
CatalogDeclarativeDataset,
20+
CatalogDeclarativeDatasetSql,
21+
CatalogDeclarativeFact,
22+
CatalogDeclarativeReference,
23+
CatalogDeclarativeReferenceSource,
24+
CatalogDeclarativeWorkspaceDataFilterColumn,
25+
)
26+
from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.date_dataset.date_dataset import (
27+
CatalogDeclarativeDateDataset,
28+
CatalogGranularitiesFormatting,
29+
)
30+
from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import (
31+
CatalogDeclarativeLdm,
32+
CatalogDeclarativeModel,
33+
)
34+
35+
from gooddata_pipelines.ldm_extension.models.aliases import DatasetId
36+
from gooddata_pipelines.ldm_extension.models.custom_data_object import (
37+
ColumnDataType,
38+
CustomDataset,
39+
CustomFieldDefinition,
40+
CustomFieldType,
41+
)
42+
43+
44+
class LdmExtensionDataProcessor:
45+
"""Create GoodData LDM from validated custom datasets and fields."""
46+
47+
DATE_GRANULARITIES: list[str] = [
48+
"MINUTE",
49+
"HOUR",
50+
"DAY",
51+
"WEEK",
52+
"MONTH",
53+
"QUARTER",
54+
"YEAR",
55+
"MINUTE_OF_HOUR",
56+
"HOUR_OF_DAY",
57+
"DAY_OF_WEEK",
58+
"DAY_OF_MONTH",
59+
"DAY_OF_YEAR",
60+
"WEEK_OF_YEAR",
61+
"MONTH_OF_YEAR",
62+
"QUARTER_OF_YEAR",
63+
]
64+
65+
@staticmethod
66+
def _attribute_from_field(
67+
dataset_name: str,
68+
custom_field: CustomFieldDefinition,
69+
) -> CatalogDeclarativeAttribute:
70+
"""Assign a declarative attribute from a custom field definition."""
71+
return CatalogDeclarativeAttribute(
72+
id=custom_field.custom_field_id,
73+
title=custom_field.custom_field_name,
74+
source_column=custom_field.custom_field_source_column,
75+
labels=[],
76+
source_column_data_type=custom_field.custom_field_source_column_data_type.value,
77+
tags=[dataset_name],
78+
)
79+
80+
@staticmethod
81+
def _fact_from_field(
82+
dataset_name: str,
83+
custom_field: CustomFieldDefinition,
84+
) -> CatalogDeclarativeFact:
85+
"""Assign a declarative fact from a custom field definition."""
86+
return CatalogDeclarativeFact(
87+
id=custom_field.custom_field_id,
88+
title=custom_field.custom_field_name,
89+
source_column=custom_field.custom_field_source_column,
90+
source_column_data_type=custom_field.custom_field_source_column_data_type.value,
91+
tags=[dataset_name],
92+
)
93+
94+
def _date_from_field(
95+
self,
96+
dataset_name: str,
97+
custom_field: CustomFieldDefinition,
98+
) -> CatalogDeclarativeDateDataset:
99+
"""Assign a declarative date dataset from a custom field definition."""
100+
101+
return CatalogDeclarativeDateDataset(
102+
id=custom_field.custom_field_id,
103+
title=custom_field.custom_field_name,
104+
granularities_formatting=CatalogGranularitiesFormatting(
105+
title_base="",
106+
title_pattern="%titleBase - %granularityTitle",
107+
),
108+
granularities=self.DATE_GRANULARITIES,
109+
tags=[dataset_name],
110+
)
111+
112+
@staticmethod
113+
def _date_ref_from_field(
114+
custom_field: CustomFieldDefinition,
115+
) -> CatalogDeclarativeReference:
116+
"""Create a date reference from a custom field definition."""
117+
return CatalogDeclarativeReference(
118+
identifier=CatalogReferenceIdentifier(
119+
id=custom_field.custom_field_id
120+
),
121+
multivalue=False,
122+
sources=[
123+
CatalogDeclarativeReferenceSource(
124+
column=custom_field.custom_field_source_column,
125+
target=CatalogGrainIdentifier(
126+
id=custom_field.custom_field_id,
127+
type=CustomFieldType.DATE.value,
128+
),
129+
data_type=custom_field.custom_field_source_column_data_type.value,
130+
)
131+
],
132+
)
133+
134+
@staticmethod
135+
def _get_sources(
136+
dataset: CustomDataset,
137+
) -> tuple[
138+
CatalogDataSourceTableIdentifier | None,
139+
CatalogDeclarativeDatasetSql | None,
140+
]:
141+
"""Get the data source table and SQL from the dataset definition."""
142+
# We will have either a table id or a sql statement. Let's store
143+
# whatever data is available to variables and pass it to the
144+
# dataset. Both can be object instances or None, but at least one
145+
# should be valid as per prior validation.
146+
dataset_source_table_id = (
147+
CatalogDataSourceTableIdentifier(
148+
id=dataset.definition.dataset_source_table,
149+
data_source_id=dataset.definition.dataset_datasource_id,
150+
path=[dataset.definition.dataset_source_table],
151+
)
152+
if dataset.definition.dataset_source_table
153+
else None
154+
)
155+
156+
dataset_sql = (
157+
CatalogDeclarativeDatasetSql(
158+
statement=dataset.definition.dataset_source_sql,
159+
data_source_id=dataset.definition.dataset_datasource_id,
160+
)
161+
if dataset.definition.dataset_source_sql
162+
else None
163+
)
164+
return dataset_source_table_id, dataset_sql
165+
166+
def datasets_to_ldm(
167+
self, datasets: dict[DatasetId, CustomDataset]
168+
) -> CatalogDeclarativeModel:
169+
"""Convert validated datasets to GoodData declarative model.
170+
171+
Args:
172+
datasets (dict[DatasetId, CustomDataset]): Dictionary of validated
173+
datasets.
174+
Returns:
175+
CatalogDeclarativeModel: GoodData declarative model representation
176+
of the datasets.
177+
"""
178+
179+
declarative_datasets: list[CatalogDeclarativeDataset] = []
180+
181+
# Date dimensions are not stored in a dataset, but as a separate datasets
182+
# in `date_instances` object on the LDM
183+
date_instances: list[CatalogDeclarativeDateDataset] = []
184+
185+
for dataset in datasets.values():
186+
date_references: list[CatalogDeclarativeReference] = []
187+
attributes: list[CatalogDeclarativeAttribute] = []
188+
facts: list[CatalogDeclarativeFact] = []
189+
190+
# Iterate through the custom fields and create the appropriate objects
191+
for custom_field in dataset.custom_fields:
192+
if custom_field.custom_field_type == CustomFieldType.ATTRIBUTE:
193+
attributes.append(
194+
self._attribute_from_field(
195+
dataset.definition.dataset_name, custom_field
196+
)
197+
)
198+
199+
elif custom_field.custom_field_type == CustomFieldType.FACT:
200+
facts.append(
201+
self._fact_from_field(
202+
dataset.definition.dataset_name, custom_field
203+
)
204+
)
205+
206+
# Process date dimensions and store them to date_instances. Date
207+
# dimensions are not stored in a dataset, but as a separate dataset.
208+
# However, they need to be referenced in the dataset references to
209+
# create the connection between the dataset and the date dimension
210+
# in the GoodData Logical Data Model.
211+
elif custom_field.custom_field_type == CustomFieldType.DATE:
212+
# Add the date dimension to the date_instances
213+
date_instances.append(
214+
self._date_from_field(
215+
dataset.definition.dataset_name, custom_field
216+
)
217+
)
218+
219+
# Create a reference so that the date dimension is connected
220+
# to the dataset in the GoodData Logical Data Model.
221+
date_references.append(
222+
self._date_ref_from_field(custom_field)
223+
)
224+
225+
else:
226+
raise ValueError(
227+
f"Unsupported custom field type: {custom_field.custom_field_type}"
228+
)
229+
230+
# Get the data source info
231+
dataset_source_table_id, dataset_sql = self._get_sources(dataset)
232+
233+
# Construct the declarative dataset object and append it to the list.
234+
declarative_datasets.append(
235+
CatalogDeclarativeDataset(
236+
id=dataset.definition.dataset_id,
237+
title=dataset.definition.dataset_name,
238+
grain=[],
239+
references=[
240+
CatalogDeclarativeReference(
241+
identifier=CatalogReferenceIdentifier(
242+
id=dataset.definition.parent_dataset_reference,
243+
),
244+
multivalue=True,
245+
sources=[
246+
CatalogDeclarativeReferenceSource(
247+
column=dataset.definition.dataset_reference_source_column,
248+
data_type=dataset.definition.dataset_reference_source_column_data_type.value,
249+
target=CatalogGrainIdentifier(
250+
id=dataset.definition.parent_dataset_reference_attribute_id,
251+
type=CustomFieldType.ATTRIBUTE.value,
252+
),
253+
)
254+
],
255+
),
256+
]
257+
+ date_references,
258+
description=None,
259+
attributes=attributes,
260+
facts=facts,
261+
data_source_table_id=dataset_source_table_id,
262+
sql=dataset_sql,
263+
workspace_data_filter_columns=[
264+
CatalogDeclarativeWorkspaceDataFilterColumn(
265+
name=dataset.definition.workspace_data_filter_column_name,
266+
data_type=ColumnDataType.STRING.value,
267+
)
268+
],
269+
workspace_data_filter_references=[
270+
CatalogDeclarativeWorkspaceDataFilterReferences(
271+
filter_id=CatalogDatasetWorkspaceDataFilterIdentifier(
272+
id=dataset.definition.workspace_data_filter_id
273+
),
274+
filter_column=dataset.definition.workspace_data_filter_column_name,
275+
filter_column_data_type=ColumnDataType.STRING.value,
276+
)
277+
],
278+
tags=[dataset.definition.dataset_name],
279+
)
280+
)
281+
282+
# Create the Logical Data Model from the datasets and the date instances.
283+
ldm = CatalogDeclarativeLdm(
284+
datasets=declarative_datasets, date_instances=date_instances
285+
)
286+
return CatalogDeclarativeModel(ldm=ldm)

0 commit comments

Comments
 (0)