|
| 1 | +from typing import Dict, Any |
1 | 2 | from datasource.bigquery.config import BigQueryConfig |
2 | 3 | from datasource.redshift.config import RedshiftConfig |
3 | 4 | from dependency_injector.wiring import inject |
|
33 | 34 | from plugins_module.plugins_container import PluginsContainer |
34 | 35 | from user_management_module.user_container import UserContainer |
35 | 36 | from user_management_module.services.user_service import UserService |
| 37 | +from flo_cloud.cloud_storage import CloudStorageManager |
36 | 38 | from fastapi import HTTPException |
37 | 39 | from user_management_module.utils.user_utils import get_current_user |
38 | 40 | from plugins_module.services.dynamic_query_service import DynamicQueryService |
39 | 41 | from db_repo_module.cache.cache_manager import CacheManager |
40 | | -from ..utils.helper import generate_cache_key, validate_yaml_query |
| 42 | +from ..utils.helper import ( |
| 43 | + generate_cache_key, |
| 44 | + generate_export_filename_hash, |
| 45 | + validate_yaml_query, |
| 46 | +) |
| 47 | +import csv |
| 48 | +import io |
41 | 49 | import yaml |
42 | 50 | from ..utils.helper import DynamicQueryRequest |
43 | 51 | from ..utils.helper import DynamicQueryExecuteRequest |
|
47 | 55 | datasource_router = APIRouter() |
48 | 56 |
|
49 | 57 |
|
| 58 | +def _serialized_rows_to_csv(rows: list) -> bytes: |
| 59 | + """Convert a list of serialized dicts (e.g. from execute_dynamic_query) to CSV bytes.""" |
| 60 | + if not rows: |
| 61 | + return b'' |
| 62 | + out = io.StringIO() |
| 63 | + fieldnames = list(rows[0].keys()) |
| 64 | + for row in rows[1:]: |
| 65 | + for k in row: |
| 66 | + if k not in fieldnames: |
| 67 | + fieldnames.append(k) |
| 68 | + writer = csv.DictWriter(out, fieldnames=fieldnames, extrasaction='ignore') |
| 69 | + |
| 70 | + def _cell_value(v): |
| 71 | + if isinstance(v, (dict, list)): |
| 72 | + return json.dumps(v) |
| 73 | + return v if v is None or isinstance(v, str) else str(v) |
| 74 | + |
| 75 | + writer.writeheader() |
| 76 | + for row in rows: |
| 77 | + writer.writerow({k: _cell_value(row.get(k)) for k in fieldnames}) |
| 78 | + return out.getvalue().encode('utf-8-sig') |
| 79 | + |
| 80 | + |
50 | 81 | @datasource_router.post('/v1/datasources') |
51 | 82 | @inject |
52 | 83 | async def add_datasource( |
@@ -718,6 +749,188 @@ async def execute_dynamic_query( |
718 | 749 | ) |
719 | 750 |
|
720 | 751 |
|
| 752 | +EXPORT_RATE_LIMIT_SECONDS = 120 # 2 minutes between exports per user |
| 753 | + |
| 754 | + |
| 755 | +@datasource_router.post('/v1/{datasource_id}/dynamic-queries/{query_id}/export') |
| 756 | +@inject |
| 757 | +async def export_dynamic_query_csv( |
| 758 | + request: Request, |
| 759 | + datasource_id: str, |
| 760 | + query_id: str, |
| 761 | + filter: str | None = Query(None, alias='$filter'), |
| 762 | + offset: int | None = 0, |
| 763 | + limit: int | None = 100, |
| 764 | + dynamic_query_params: DynamicQueryExecuteRequest = None, |
| 765 | + response_formatter: ResponseFormatter = Depends( |
| 766 | + Provide[CommonContainer.response_formatter] |
| 767 | + ), |
| 768 | + dynamic_query_yaml_service: DynamicQueryService = Depends( |
| 769 | + Provide[PluginsContainer.dynamic_query_service] |
| 770 | + ), |
| 771 | + user_service: UserService = Depends(Provide[UserContainer.user_service]), |
| 772 | + cloud_manager: CloudStorageManager = Depends( |
| 773 | + Provide[PluginsContainer.cloud_manager] |
| 774 | + ), |
| 775 | + config: dict = Depends(Provide[PluginsContainer.config]), |
| 776 | + cache_manager: CacheManager = Depends(Provide[PluginsContainer.cache_manager]), |
| 777 | + force_fetch: int = Query(0), |
| 778 | +): |
| 779 | + """Execute the dynamic query and return results as a downloadable CSV file.""" |
| 780 | + role_id, user_id, _ = get_current_user(request) |
| 781 | + |
| 782 | + # Block multiple exports per user within a 2-minute window (Redis) |
| 783 | + export_rate_key = f'dynamic_query_export_rate:{user_id}' |
| 784 | + if not cache_manager.add( |
| 785 | + export_rate_key, '1', expiry=EXPORT_RATE_LIMIT_SECONDS, nx=True |
| 786 | + ): |
| 787 | + return JSONResponse( |
| 788 | + status_code=status.HTTP_429_TOO_MANY_REQUESTS, |
| 789 | + content=response_formatter.buildErrorResponse( |
| 790 | + f'Export rate limit: one export per user every {EXPORT_RATE_LIMIT_SECONDS // 60} minutes. Please try again later.' |
| 791 | + ), |
| 792 | + ) |
| 793 | + datasource_type, datasource_config = await get_datasource_config(datasource_id) |
| 794 | + if not datasource_config: |
| 795 | + return JSONResponse( |
| 796 | + status_code=status.HTTP_404_NOT_FOUND, |
| 797 | + content=response_formatter.buildErrorResponse( |
| 798 | + f'Datasource not found: {datasource_id}' |
| 799 | + ), |
| 800 | + ) |
| 801 | + yaml_query, _ = await dynamic_query_yaml_service.get_dynamic_yaml_query(query_id) |
| 802 | + if not yaml_query: |
| 803 | + return JSONResponse( |
| 804 | + status_code=status.HTTP_404_NOT_FOUND, |
| 805 | + content=response_formatter.buildErrorResponse( |
| 806 | + f'Dynamic query not found: {query_id}' |
| 807 | + ), |
| 808 | + ) |
| 809 | + |
| 810 | + rls_filter_str = None |
| 811 | + is_admin = await check_admin(role_id) |
| 812 | + if not is_admin: |
| 813 | + rls_filters = await user_service.get_user_resources( |
| 814 | + user_id=user_id, scope=ResourceScope.DATA |
| 815 | + ) |
| 816 | + if len(rls_filters) == 0: |
| 817 | + return JSONResponse( |
| 818 | + status_code=status.HTTP_403_FORBIDDEN, |
| 819 | + content=response_formatter.buildErrorResponse( |
| 820 | + 'Data access not set for non-admin user' |
| 821 | + ), |
| 822 | + ) |
| 823 | + rls_filters = fetch_data_filters(rls_filters) |
| 824 | + rls_filter_str = f"{ ' $and '.join(rls_filters)}" |
| 825 | + |
| 826 | + # Bucket and filename: hash of $filter, limit, offset, dynamic_query_params |
| 827 | + provider = config['cloud_config']['cloud_provider'] |
| 828 | + bucket_name = ( |
| 829 | + config['aws']['aws_asset_storage_bucket'] |
| 830 | + if provider == 'aws' |
| 831 | + else config['gcp']['gcp_asset_storage_bucket'] |
| 832 | + ) |
| 833 | + export_hash = generate_export_filename_hash( |
| 834 | + filter=filter, |
| 835 | + limit=limit, |
| 836 | + offset=offset, |
| 837 | + params=dynamic_query_params.params if dynamic_query_params else None, |
| 838 | + rls_filter_str=rls_filter_str, |
| 839 | + ) |
| 840 | + filename = f'export_{query_id}_{export_hash}.csv' |
| 841 | + file_key = f'dynamic_query_exports/{filename}' |
| 842 | + |
| 843 | + # If not force_fetch, return existing file from bucket if present |
| 844 | + if not force_fetch: |
| 845 | + existing_keys, _ = cloud_manager.list_files( |
| 846 | + bucket_name=bucket_name, |
| 847 | + prefix=file_key, |
| 848 | + page_size=1, |
| 849 | + page_number=1, |
| 850 | + ) |
| 851 | + if existing_keys and existing_keys[0] == file_key: |
| 852 | + signed_url = cloud_manager.generate_presigned_url( |
| 853 | + bucket_name=bucket_name, key=file_key, type='GET' |
| 854 | + ) |
| 855 | + return JSONResponse( |
| 856 | + status_code=status.HTTP_200_OK, |
| 857 | + content=response_formatter.buildSuccessResponse( |
| 858 | + {'export_url': signed_url} |
| 859 | + ), |
| 860 | + ) |
| 861 | + |
| 862 | + datasource_plugin = DatasourcePlugin(datasource_type, datasource_config) |
| 863 | + res: Dict[str, Any] = await datasource_plugin.execute_dynamic_query( |
| 864 | + yaml_query, |
| 865 | + rls_filter_str, |
| 866 | + filter, |
| 867 | + offset, |
| 868 | + limit, |
| 869 | + dynamic_query_params.params if dynamic_query_params else None, |
| 870 | + ) |
| 871 | + |
| 872 | + if not res: |
| 873 | + return JSONResponse( |
| 874 | + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, |
| 875 | + content=response_formatter.buildErrorResponse( |
| 876 | + f'Unexpected dynamic query result format for query_id {query_id}, no results' |
| 877 | + ), |
| 878 | + ) |
| 879 | + |
| 880 | + first_key = next(iter(res)) |
| 881 | + if res[first_key].get('status') != 'success': |
| 882 | + return JSONResponse( |
| 883 | + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, |
| 884 | + content=response_formatter.buildErrorResponse( |
| 885 | + f'Unexpected dynamic query result format for query_id {query_id}, no results' |
| 886 | + ), |
| 887 | + ) |
| 888 | + |
| 889 | + serialized_res = serialize_values(res[first_key]['result']) |
| 890 | + |
| 891 | + # Stream rows to CSV directly in GCS/S3 to avoid building the full CSV in memory |
| 892 | + rows = serialized_res or [] |
| 893 | + if not isinstance(rows, list): |
| 894 | + return JSONResponse( |
| 895 | + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, |
| 896 | + content=response_formatter.buildErrorResponse( |
| 897 | + f'Unexpected dynamic query result format for query_id {query_id}, invalid rows' |
| 898 | + ), |
| 899 | + ) |
| 900 | + |
| 901 | + if rows: |
| 902 | + fieldnames = list(rows[0].keys()) |
| 903 | + for row in rows[1:]: |
| 904 | + for k in row: |
| 905 | + if k not in fieldnames: |
| 906 | + fieldnames.append(k) |
| 907 | + else: |
| 908 | + fieldnames = [] |
| 909 | + |
| 910 | + def _cell_value(v): |
| 911 | + if isinstance(v, (dict, list)): |
| 912 | + return json.dumps(v) |
| 913 | + return v if v is None or isinstance(v, str) else str(v) |
| 914 | + |
| 915 | + with cloud_manager.open_text_writer( |
| 916 | + bucket_name=bucket_name, key=file_key, content_type='text/csv' |
| 917 | + ) as f: |
| 918 | + if fieldnames: |
| 919 | + writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore') |
| 920 | + writer.writeheader() |
| 921 | + for row in rows: |
| 922 | + writer.writerow({k: _cell_value(row.get(k)) for k in fieldnames}) |
| 923 | + |
| 924 | + signed_url = cloud_manager.generate_presigned_url( |
| 925 | + bucket_name=bucket_name, key=file_key, type='GET' |
| 926 | + ) |
| 927 | + |
| 928 | + return JSONResponse( |
| 929 | + status_code=status.HTTP_200_OK, |
| 930 | + content=response_formatter.buildSuccessResponse({'export_url': signed_url}), |
| 931 | + ) |
| 932 | + |
| 933 | + |
721 | 934 | @datasource_router.delete('/v1/{datasource_id}/dynamic-queries/{query_id}') |
722 | 935 | @inject |
723 | 936 | async def delete_dynamic_query( |
|
0 commit comments