From c02c4512d6883e399a09d667e5b216dbfea2902e Mon Sep 17 00:00:00 2001 From: Brent Bovenzi Date: Tue, 20 Feb 2024 22:53:02 -0500 Subject: [PATCH 1/5] Clean up webserver endpoints adding to audit log --- airflow/config_templates/config.yml | 2 +- airflow/www/views.py | 26 ++++---------------------- 2 files changed, 5 insertions(+), 23 deletions(-) diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 4db178cd8f1fb..057998fb230a2 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -1838,7 +1838,7 @@ webserver: version_added: 2.3.0 type: string example: ~ - default: "gantt,landing_times,tries,duration,calendar,graph,grid,tree,tree_data" + default: "gantt,landing_times,tries,duration,calendar,graph,grid,tree" audit_view_included_events: description: | Comma separated string of view events to include in dag audit view. diff --git a/airflow/www/views.py b/airflow/www/views.py index 1e00e86423113..3561be4a5c43b 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -1325,8 +1325,7 @@ def legacy_code(self): @expose("/dags//code") @auth.has_access_dag("GET", DagAccessEntity.CODE) - @provide_session - def code(self, dag_id, session: Session = NEW_SESSION): + def code(self, dag_id): """Dag Code.""" kwargs = { **sanitize_args(request.args), @@ -2491,7 +2490,7 @@ def dagrun_queued(self): @expose("/dagrun_details") def dagrun_details(self): - """Redirect to the GRID DAGRun page. This is avoids breaking links.""" + """Redirect to the Grid DagRun page. This is avoids breaking links.""" dag_id = request.args.get("dag_id") run_id = request.args.get("run_id") return redirect(url_for("Airflow.grid", dag_id=dag_id, dag_run_id=run_id)) @@ -2762,16 +2761,12 @@ def success(self): ) @expose("/dags/") - @gzipped - @action_logging def dag(self, dag_id): """Redirect to default DAG view.""" kwargs = {**sanitize_args(request.args), "dag_id": dag_id} return redirect(url_for("Airflow.grid", **kwargs)) @expose("/tree") - @gzipped - @action_logging def legacy_tree(self): """Redirect to the replacement - grid view. Kept for backwards compatibility.""" return redirect(url_for("Airflow.grid", **sanitize_args(request.args))) @@ -2838,8 +2833,6 @@ def grid(self, dag_id: str, session: Session = NEW_SESSION): ) @expose("/calendar") - @gzipped - @action_logging def legacy_calendar(self): """Redirect from url param.""" return redirect(url_for("Airflow.calendar", **sanitize_args(request.args))) @@ -2953,15 +2946,12 @@ def calendar(self, dag_id: str, session: Session = NEW_SESSION): ) @expose("/graph") - @gzipped - @action_logging def legacy_graph(self): """Redirect from url param.""" return redirect(url_for("Airflow.graph", **sanitize_args(request.args))) @expose("/dags//graph") @gzipped - @action_logging @provide_session def graph(self, dag_id: str, session: Session = NEW_SESSION): """Redirect to the replacement - grid + graph. Kept for backwards compatibility.""" @@ -2984,7 +2974,6 @@ def graph(self, dag_id: str, session: Session = NEW_SESSION): return redirect(url_for("Airflow.grid", **kwargs)) @expose("/duration") - @action_logging def legacy_duration(self): """Redirect from url param.""" return redirect(url_for("Airflow.duration", **sanitize_args(request.args))) @@ -3137,7 +3126,6 @@ def grouping_key(ti: TaskInstance): ) @expose("/tries") - @action_logging def legacy_tries(self): """Redirect from url param.""" return redirect(url_for("Airflow.tries", **sanitize_args(request.args))) @@ -3220,7 +3208,6 @@ def tries(self, dag_id: str, session: Session = NEW_SESSION): ) @expose("/landing_times") - @action_logging def legacy_landing_times(self): """Redirect from url param.""" return redirect(url_for("Airflow.landing_times", **sanitize_args(request.args))) @@ -3326,14 +3313,12 @@ def paused(self): return "OK" @expose("/gantt") - @action_logging def legacy_gantt(self): """Redirect from url param.""" return redirect(url_for("Airflow.gantt", **sanitize_args(request.args))) @expose("/dags//gantt") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) - @action_logging @provide_session def gantt(self, dag_id: str, session: Session = NEW_SESSION): """Redirect to the replacement - grid + gantt. Kept for backwards compatibility.""" @@ -3406,12 +3391,10 @@ def extra_links(self, *, session: Session = NEW_SESSION): @expose("/object/graph_data") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) @gzipped - @action_logging - @provide_session - def graph_data(self, session: Session = NEW_SESSION): + def graph_data(self): """Get Graph Data.""" dag_id = request.args.get("dag_id") - dag = get_airflow_app().dag_bag.get_dag(dag_id, session=session) + dag = get_airflow_app().dag_bag.get_dag(dag_id) root = request.args.get("root") if root: filter_upstream = request.args.get("filter_upstream") == "true" @@ -3435,7 +3418,6 @@ def graph_data(self, session: Session = NEW_SESSION): @expose("/object/task_instances") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) - @action_logging def task_instances(self): """Show task instances.""" dag_id = request.args.get("dag_id") From cde1d119542231a0f17590defce3e04bcdc71f3d Mon Sep 17 00:00:00 2001 From: Brent Bovenzi Date: Wed, 21 Feb 2024 15:50:31 -0500 Subject: [PATCH 2/5] Remove all view-only action logging and change excluded events list --- airflow/config_templates/config.yml | 4 ++-- airflow/www/views.py | 14 +------------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 057998fb230a2..41d0b56c9fc55 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -1837,8 +1837,8 @@ webserver: The audit logs in the db will not be affected by this parameter. version_added: 2.3.0 type: string - example: ~ - default: "gantt,landing_times,tries,duration,calendar,graph,grid,tree" + example: "cli_task_run,running,success" + default: ~ audit_view_included_events: description: | Comma separated string of view events to include in dag audit view. diff --git a/airflow/www/views.py b/airflow/www/views.py index 3561be4a5c43b..8c00d291f20fa 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -1347,7 +1347,6 @@ def dag_details(self, dag_id): @expose("/rendered-templates") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) - @action_logging @provide_session def rendered_templates(self, session): """Get rendered Dag.""" @@ -1466,7 +1465,6 @@ def rendered_templates(self, session): @expose("/rendered-k8s") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) - @action_logging @provide_session def rendered_k8s(self, *, session: Session = NEW_SESSION): """Get rendered k8s yaml.""" @@ -1532,7 +1530,6 @@ def rendered_k8s(self, *, session: Session = NEW_SESSION): @expose("/get_logs_with_metadata") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) @auth.has_access_dag("GET", DagAccessEntity.TASK_LOGS) - @action_logging @provide_session def get_logs_with_metadata(self, session: Session = NEW_SESSION): """Retrieve logs including metadata.""" @@ -1613,7 +1610,6 @@ def get_logs_with_metadata(self, session: Session = NEW_SESSION): @expose("/log") @auth.has_access_dag("GET", DagAccessEntity.TASK_LOGS) - @action_logging @provide_session def log(self, session: Session = NEW_SESSION): """Retrieve log.""" @@ -1658,7 +1654,6 @@ def log(self, session: Session = NEW_SESSION): @expose("/redirect_to_external_log") @auth.has_access_dag("GET", DagAccessEntity.TASK_LOGS) - @action_logging @provide_session def redirect_to_external_log(self, session: Session = NEW_SESSION): """Redirects to external log.""" @@ -1690,7 +1685,6 @@ def redirect_to_external_log(self, session: Session = NEW_SESSION): @expose("/task") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) - @action_logging @provide_session def task(self, session: Session = NEW_SESSION): """Retrieve task.""" @@ -1816,7 +1810,6 @@ def include_task_attrs(attr_name): @expose("/xcom") @auth.has_access_dag("GET", DagAccessEntity.XCOM) - @action_logging @provide_session def xcom(self, session: Session = NEW_SESSION): """Retrieve XCOM.""" @@ -2345,6 +2338,7 @@ def dagrun_clear(self, *, session: Session = NEW_SESSION): @expose("/blocked", methods=["POST"]) @auth.has_access_dag("GET", DagAccessEntity.RUN) + @action_logging @provide_session def blocked(self, session: Session = NEW_SESSION): """Mark Dag Blocked.""" @@ -2980,7 +2974,6 @@ def legacy_duration(self): @expose("/dags//duration") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) - @action_logging @provide_session def duration(self, dag_id: str, session: Session = NEW_SESSION): """Get Dag as duration graph.""" @@ -3132,7 +3125,6 @@ def legacy_tries(self): @expose("/dags//tries") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) - @action_logging @provide_session def tries(self, dag_id: str, session: Session = NEW_SESSION): """Show all tries.""" @@ -3214,7 +3206,6 @@ def legacy_landing_times(self): @expose("/dags//landing-times") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) - @action_logging @provide_session def landing_times(self, dag_id: str, session: Session = NEW_SESSION): """Show landing times.""" @@ -3334,7 +3325,6 @@ def gantt(self, dag_id: str, session: Session = NEW_SESSION): @expose("/extra_links") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) - @action_logging @provide_session def extra_links(self, *, session: Session = NEW_SESSION): """ @@ -3745,7 +3735,6 @@ def datasets_summary(self): ) @expose("/robots.txt") - @action_logging def robots(self): """ Return a robots.txt file for blocking certain search engine crawlers. @@ -5780,7 +5769,6 @@ class DagDependenciesView(AirflowBaseView): @expose("/dag-dependencies") @auth.has_access_dag("GET", DagAccessEntity.DEPENDENCIES) @gzipped - @action_logging def list(self): """Display DAG dependencies.""" title = "DAG Dependencies" From 63785800831fbe7da291b634005bdcd02323b711 Mon Sep 17 00:00:00 2001 From: Brent Bovenzi Date: Wed, 21 Feb 2024 18:48:17 -0500 Subject: [PATCH 3/5] Restore robots logging --- airflow/www/views.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airflow/www/views.py b/airflow/www/views.py index 8c00d291f20fa..452d6eb2e7c26 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -3735,6 +3735,7 @@ def datasets_summary(self): ) @expose("/robots.txt") + @action_logging def robots(self): """ Return a robots.txt file for blocking certain search engine crawlers. From 154bc9d43e48ce64c11b2e3e2aac07f9176d94d5 Mon Sep 17 00:00:00 2001 From: Brent Bovenzi Date: Thu, 22 Feb 2024 12:10:56 -0500 Subject: [PATCH 4/5] Remove two extra loggings+tests, add test for robots.txt --- airflow/www/views.py | 2 -- tests/www/views/test_views_decorators.py | 36 ++++-------------------- 2 files changed, 6 insertions(+), 32 deletions(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 452d6eb2e7c26..5fa8542dfc556 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -2768,7 +2768,6 @@ def legacy_tree(self): @expose("/dags//grid") @auth.has_access_dag("GET", DagAccessEntity.TASK_INSTANCE) @gzipped - @action_logging @provide_session def grid(self, dag_id: str, session: Session = NEW_SESSION): """Get Dag's grid view.""" @@ -2834,7 +2833,6 @@ def legacy_calendar(self): @expose("/dags//calendar") @auth.has_access_dag("GET", DagAccessEntity.RUN) @gzipped - @action_logging @provide_session def calendar(self, dag_id: str, session: Session = NEW_SESSION): """Get DAG runs as calendar.""" diff --git a/tests/www/views/test_views_decorators.py b/tests/www/views/test_views_decorators.py index 00e657a2d7db5..4cfab0926ec31 100644 --- a/tests/www/views/test_views_decorators.py +++ b/tests/www/views/test_views_decorators.py @@ -17,8 +17,6 @@ # under the License. from __future__ import annotations -import urllib.parse - import pytest from airflow.models import DagBag, Variable @@ -92,39 +90,17 @@ def clean_db(): clear_db_variables() -def test_action_logging_get(session, admin_client): - url = ( - f"dags/example_bash_operator/grid?" - f"execution_date={urllib.parse.quote_plus(str(EXAMPLE_DAG_DEFAULT_DATE))}" - ) - resp = admin_client.get(url, follow_redirects=True) - check_content_in_response("success", resp) +def test_action_logging_robots(session, admin_client): + url = "/robots.txt" + admin_client.get(url, follow_redirects=True) # In mysql backend, this commit() is needed to write down the logs session.commit() _check_last_log( session, - dag_id="example_bash_operator", - event="grid", - execution_date=EXAMPLE_DAG_DEFAULT_DATE, - ) - - -def test_action_logging_get_legacy_view(session, admin_client): - url = ( - f"tree?dag_id=example_bash_operator&" - f"execution_date={urllib.parse.quote_plus(str(EXAMPLE_DAG_DEFAULT_DATE))}" - ) - resp = admin_client.get(url, follow_redirects=True) - check_content_in_response("success", resp) - - # In mysql backend, this commit() is needed to write down the logs - session.commit() - _check_last_log( - session, - dag_id="example_bash_operator", - event="legacy_tree", - execution_date=EXAMPLE_DAG_DEFAULT_DATE, + event="robots", + dag_id=None, + execution_date=None, ) From 26642518a23ddd5466de03923c9abccb4cf9b3d2 Mon Sep 17 00:00:00 2001 From: Brent Bovenzi Date: Thu, 22 Feb 2024 22:31:06 -0500 Subject: [PATCH 5/5] Fix rendered template test --- airflow/www/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 5fa8542dfc556..9b19f26fc60f9 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -1360,7 +1360,7 @@ def rendered_templates(self, session): logging.info("Retrieving rendered templates.") dag: DAG = get_airflow_app().dag_bag.get_dag(dag_id) - dag_run = dag.get_dagrun(execution_date=dttm, session=session) + dag_run = dag.get_dagrun(execution_date=dttm) raw_task = dag.get_task(task_id).prepare_for_execution() no_dagrun = False