From 3950e307bc6692f882e348492d1d12c1dc4d769e Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sun, 9 Jun 2024 09:26:20 +0200
Subject: [PATCH 1/5] Redesign the scaling tasks guide.

---
 docs/source/how_to_guides/bp_scaling_tasks.md | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/docs/source/how_to_guides/bp_scaling_tasks.md b/docs/source/how_to_guides/bp_scaling_tasks.md
index fa7cb5e9b..0c13bf759 100644
--- a/docs/source/how_to_guides/bp_scaling_tasks.md
+++ b/docs/source/how_to_guides/bp_scaling_tasks.md
@@ -1,14 +1,27 @@
 # Scaling tasks
 
-In any bigger project you quickly come to the point where you stack multiple repetitions
-of tasks on top of each other.
+In many projects, tasks are repeated across multiple dimensions that are stacked on top
+of each other.
 
-For example, you have one dataset, four different ways to prepare it, and three
-statistical models to analyze the data. The cartesian product of all steps combined
-comprises twelve differently fitted models.
+For example, take a project that there are four ways to simulate data and there are
+three different models that should be fitted on each dataset.
 
-Here you find some tips on how to set up your tasks such that you can easily modify the
-cartesian product of steps.
+Assuming there is a high-level interface to simulate data, we can loop over the task for
+simulating data four times with different arguments.
+
+Assuming there is a high-level interface to fit models to data,
+
+Assuming that you can easily switch the model the model fitting can be done in a taskThe
+cartesian product of all steps combined comprises twelve differently fitted models.
+
+This guide shows an approach to organizing your tasks that can be best described as
+flattening the loops.
+
+## The data catalog
+
+First of all, we need to create a data catalog in a `config.py` in your project.
+
+The data catalog plays a key role in managing lots of repetitions of tasks because it
 
 ## Scalability
 
@@ -18,7 +31,7 @@ different models to each specification.
 
 This is the structure of the project.
 
-```
+```text
 my_project
 ├───pyproject.toml
 │

From e143a4d03dc218c1ecf81b63c79f8d2d45f9a8bf Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sun, 9 Jun 2024 09:28:16 +0200
Subject: [PATCH 2/5] Fix.

---
 docs/source/changes.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/source/changes.md b/docs/source/changes.md
index 866452ea1..2a521c5a5 100644
--- a/docs/source/changes.md
+++ b/docs/source/changes.md
@@ -5,6 +5,10 @@ chronological order. Releases follow [semantic versioning](https://semver.org/)
 releases are available on [PyPI](https://pypi.org/project/pytask) and
 [Anaconda.org](https://anaconda.org/conda-forge/pytask).
 
+## 0.5.1 - 2024-xx-xx
+
+- {pull}`616` redesigns the guide on "Scaling Tasks".
+
 ## 0.5.0 - 2024-05-26
 
 - {pull}`548` fixes the type hints for {meth}`~pytask.Task.execute` and

From d34550f89b9b51e549af5ccd8dd7919010ec89b5 Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Mon, 24 Jun 2024 15:27:57 +0200
Subject: [PATCH 3/5] Fix.

---
 docs/source/how_to_guides/bp_scaling_tasks.md | 93 +++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/docs/source/how_to_guides/bp_scaling_tasks.md b/docs/source/how_to_guides/bp_scaling_tasks.md
index 0c13bf759..e1a3d2772 100644
--- a/docs/source/how_to_guides/bp_scaling_tasks.md
+++ b/docs/source/how_to_guides/bp_scaling_tasks.md
@@ -1,5 +1,98 @@
 # Scaling tasks
 
+- \[ \] Write about adding another dimension.
+- \[ \] Write about adding another level.
+- \[ \] Write about executing subsets of tasks.
+- \[ \] Write about grouping by one dimension´or aggregating.
+
+In projects where task inputs and outputs are sufficiently standardized, it is possible
+to make extensive use of task repetition.
+
+A common pattern is to write multiple loops around a task function where each loop
+stands for a different dimension. A dimension, for example, represents different
+datasets or model specifications to analyze the datasets.
+
+There is nothing wrong with using nested loops for simpler projects that are clearly
+defined in scope. But, often they are just the start of looking at a problem from
+different angles and soon you want to add more dimensions.
+
+Adding another loop in a lot of places in your project is cumbersome and the increased
+indentation is visually displeasing.
+
+It is not the most serious problem, though. More importantly, it becomes cumbersome to
+reference dependencies of products and to set unique identifiers for tasks. The latter
+is important to execute only subsets of the project.
+
+How do we solve these problems? Here is a brief explanation of the solution.
+
+1. Create objects to define every dimension in the project. A dimension can be
+   characterized by a single value like a {class}`~pathlib.Path`, an
+   {class}`~enum.Enum`, or a {class}`~typing.NamedTuple` or
+   {func}`~dataclasses.dataclass` if more fields are needed.
+
+1. Create an object like a {class}`~typing.NamedTuple` or a
+   {func}`~dataclasses.dataclass` that has one attribute for each dimension. For lack of
+   a better name, we will call this unit an experiment.
+
+   The experiment combines the information provided by each dimension to create a unique
+   identifier for each experiment and the names or paths of dependencies and products
+   for each task.
+
+To make the idea more tangible, let us focus on an example.
+
+## Example
+
+Let us assume we have a project with multiple datasets and model specifications that
+should be fitted to the data.
+
+The datasets are created by the task from the
+{doc}`tutorials <../tutorials/defining_dependencies_products>` parametrized with
+different coefficients.
+
+Below that is the task that fits different models to the datasets using a double loop.
+
+```python
+from pathlib import Path
+from pytask import task, Product
+
+
+SRC = Path(__file__).parent
+BLD = SRC / "bld"
+
+
+
+
+
+for dat
+
+
+
+for data_name in ("a", "b", "c"):
+    for model_name in ("ols", "logit", "linear_prob"):
+
+        @task
+        def task_fit_model(path_to_data: Path = SRC / f"{data_name}.pkl")
+
+```
+
+1. The level of indentation is not visually pleasing and does not allow us to
+   sufficiently use every line in the file.
+
+1. Whenever we add another dimension to our problem, we need to extend every occurrence
+   of the nested loops.
+
+But, these problems are more annoying than truly
+
+The first and most important problem is that
+
+The first problem is t
+
+There are couple of problems that arise in these projects.
+
+The main problem is that with
+
+In projects where task inputs and outputs can be standardized and general interface
+
 In many projects, tasks are repeated across multiple dimensions that are stacked on top
 of each other.
 

From 7175e83e9192bd172907c2c5eca425eda46fdecf Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Fri, 12 Jul 2024 16:08:10 +0200
Subject: [PATCH 4/5] Add progress.

---
 ...asks.md => bp_complex_task_repetitions.md} | 28 +++++++++++--------
 .../bp_structure_of_task_files.md             |  2 +-
 docs/source/how_to_guides/index.md            |  2 +-
 .../repeating_tasks_with_different_inputs.md  |  3 +-
 .../bp_complex_task_repetitions/example.py    | 19 +++++++++++++
 5 files changed, 40 insertions(+), 14 deletions(-)
 rename docs/source/how_to_guides/{bp_scaling_tasks.md => bp_complex_task_repetitions.md} (88%)
 create mode 100644 docs_src/how_to_guides/bp_complex_task_repetitions/example.py

diff --git a/docs/source/how_to_guides/bp_scaling_tasks.md b/docs/source/how_to_guides/bp_complex_task_repetitions.md
similarity index 88%
rename from docs/source/how_to_guides/bp_scaling_tasks.md
rename to docs/source/how_to_guides/bp_complex_task_repetitions.md
index e1a3d2772..6e7e16fff 100644
--- a/docs/source/how_to_guides/bp_scaling_tasks.md
+++ b/docs/source/how_to_guides/bp_complex_task_repetitions.md
@@ -1,29 +1,35 @@
-# Scaling tasks
+# Complex task repetitions
 
 - \[ \] Write about adding another dimension.
 - \[ \] Write about adding another level.
 - \[ \] Write about executing subsets of tasks.
-- \[ \] Write about grouping by one dimension´or aggregating.
+- \[ \] Write about grouping by one dimension or aggregating.
 
 In projects where task inputs and outputs are sufficiently standardized, it is possible
 to make extensive use of task repetition.
 
 A common pattern is to write multiple loops around a task function where each loop
-stands for a different dimension. A dimension, for example, represents different
-datasets or model specifications to analyze the datasets.
+stands for a different dimension. A dimension might represent different datasets or
+model specifications to analyze the datasets like in the following example.
+
+```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/example.py
+```
 
 There is nothing wrong with using nested loops for simpler projects that are clearly
 defined in scope. But, often they are just the start of looking at a problem from
-different angles and soon you want to add more dimensions.
+different angles.
+
+For more complex projects, you are quickly running into a couple of problems.
 
-Adding another loop in a lot of places in your project is cumbersome and the increased
-indentation is visually displeasing.
+- You need to add the nested loops in a lot of places.
+- Every dimension adds another level of indentation which is not aesthetically pleasing.
+- Adding another dimension leads to a lot of changes in many places.
+- It becomes cumbersome to manage the unique ids of the repeated tasks.
 
-It is not the most serious problem, though. More importantly, it becomes cumbersome to
-reference dependencies of products and to set unique identifiers for tasks. The latter
-is important to execute only subsets of the project.
+The rest of the guide lays out a pattern that
 
-How do we solve these problems? Here is a brief explanation of the solution.
+To solve these problems, the pattern laid out in the rest of the article proved to be
+helpful.
 
 1. Create objects to define every dimension in the project. A dimension can be
    characterized by a single value like a {class}`~pathlib.Path`, an
diff --git a/docs/source/how_to_guides/bp_structure_of_task_files.md b/docs/source/how_to_guides/bp_structure_of_task_files.md
index 857f64792..84e16789f 100644
--- a/docs/source/how_to_guides/bp_structure_of_task_files.md
+++ b/docs/source/how_to_guides/bp_structure_of_task_files.md
@@ -14,7 +14,7 @@ are looking for orientation or inspiration, here are some tips.
   module is for.
 
   ```{seealso}
-  The only exception might be for {doc}`repetitions <bp_scaling_tasks>`.
+  The only exception might be for {doc}`repetitions <bp_complex_task_repetitions>`.
   ```
 
 - The purpose of the task function is to handle IO operations like loading and saving
diff --git a/docs/source/how_to_guides/index.md b/docs/source/how_to_guides/index.md
index 8f0e9f47e..53068ee0f 100644
--- a/docs/source/how_to_guides/index.md
+++ b/docs/source/how_to_guides/index.md
@@ -42,5 +42,5 @@ maxdepth: 1
 bp_structure_of_a_research_project
 bp_structure_of_task_files
 bp_templates_and_projects
-bp_scaling_tasks
+bp_complex_task_repetitions
 ```
diff --git a/docs/source/tutorials/repeating_tasks_with_different_inputs.md b/docs/source/tutorials/repeating_tasks_with_different_inputs.md
index 750435d65..136152ed0 100644
--- a/docs/source/tutorials/repeating_tasks_with_different_inputs.md
+++ b/docs/source/tutorials/repeating_tasks_with_different_inputs.md
@@ -291,7 +291,8 @@ for id_, kwargs in ID_TO_KWARGS.items():
     def task_create_random_data(i, produces): ...
 ```
 
-The {doc}`best-practices guide on parametrizations <../how_to_guides/bp_scaling_tasks>`
+The
+{doc}`best-practices guide on parametrizations <../how_to_guides/bp_complex_task_repetitions>`
 goes into even more detail on how to scale parametrizations.
 
 ## A warning on globals
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/example.py b/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
new file mode 100644
index 000000000..d0893d7a1
--- /dev/null
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
@@ -0,0 +1,19 @@
+from pathlib import Path
+from typing import Annotated
+
+from pytask import Product
+from pytask import task
+
+SRC = Path(__file__).parent
+BLD = SRC / "bld"
+
+
+for data_name in ("a", "b", "c"):
+    for model_name in ("ols", "logit", "linear_prob"):
+
+        @task
+        def task_fit_model(
+            path_to_data: Path = SRC / f"{data_name}.pkl",
+            path_to_model: Annotated[Path, Product] = BLD
+            / f"{data_name}-{model_name}.pkl",
+        ) -> None: ...

From a828c217c5c50fe2ac2e959f5e386896a5ea4ce0 Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sun, 14 Jul 2024 16:35:22 +0200
Subject: [PATCH 5/5] FIx.

---
 .../bp_complex_task_repetitions.md            | 226 ++++--------------
 .../bp_complex_task_repetitions/example.py    |   2 +-
 .../example_improved.py                       |  14 ++
 .../bp_complex_task_repetitions/experiment.py |  37 +++
 docs_src/how_to_guides/bp_scaling_tasks_1.py  |  20 --
 docs_src/how_to_guides/bp_scaling_tasks_2.py  |  39 ---
 docs_src/how_to_guides/bp_scaling_tasks_3.py  |  18 --
 docs_src/how_to_guides/bp_scaling_tasks_4.py  |  36 ---
 8 files changed, 100 insertions(+), 292 deletions(-)
 create mode 100644 docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
 create mode 100644 docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
 delete mode 100644 docs_src/how_to_guides/bp_scaling_tasks_1.py
 delete mode 100644 docs_src/how_to_guides/bp_scaling_tasks_2.py
 delete mode 100644 docs_src/how_to_guides/bp_scaling_tasks_3.py
 delete mode 100644 docs_src/how_to_guides/bp_scaling_tasks_4.py

diff --git a/docs/source/how_to_guides/bp_complex_task_repetitions.md b/docs/source/how_to_guides/bp_complex_task_repetitions.md
index 6e7e16fff..68e44569d 100644
--- a/docs/source/how_to_guides/bp_complex_task_repetitions.md
+++ b/docs/source/how_to_guides/bp_complex_task_repetitions.md
@@ -1,209 +1,79 @@
 # Complex task repetitions
 
-- \[ \] Write about adding another dimension.
-- \[ \] Write about adding another level.
-- \[ \] Write about executing subsets of tasks.
-- \[ \] Write about grouping by one dimension or aggregating.
+{doc}`Task repetitions <../tutorials/repeating_tasks_with_different_inputs>` are amazing
+if you want to execute lots of tasks while not repeating yourself in code.
 
-In projects where task inputs and outputs are sufficiently standardized, it is possible
-to make extensive use of task repetition.
+But, in any bigger project, repetitions can become hard to maintain because there are
+multiple layers or dimensions of repetition.
 
-A common pattern is to write multiple loops around a task function where each loop
-stands for a different dimension. A dimension might represent different datasets or
-model specifications to analyze the datasets like in the following example.
-
-```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/example.py
-```
-
-There is nothing wrong with using nested loops for simpler projects that are clearly
-defined in scope. But, often they are just the start of looking at a problem from
-different angles.
-
-For more complex projects, you are quickly running into a couple of problems.
-
-- You need to add the nested loops in a lot of places.
-- Every dimension adds another level of indentation which is not aesthetically pleasing.
-- Adding another dimension leads to a lot of changes in many places.
-- It becomes cumbersome to manage the unique ids of the repeated tasks.
-
-The rest of the guide lays out a pattern that
-
-To solve these problems, the pattern laid out in the rest of the article proved to be
-helpful.
-
-1. Create objects to define every dimension in the project. A dimension can be
-   characterized by a single value like a {class}`~pathlib.Path`, an
-   {class}`~enum.Enum`, or a {class}`~typing.NamedTuple` or
-   {func}`~dataclasses.dataclass` if more fields are needed.
-
-1. Create an object like a {class}`~typing.NamedTuple` or a
-   {func}`~dataclasses.dataclass` that has one attribute for each dimension. For lack of
-   a better name, we will call this unit an experiment.
-
-   The experiment combines the information provided by each dimension to create a unique
-   identifier for each experiment and the names or paths of dependencies and products
-   for each task.
-
-To make the idea more tangible, let us focus on an example.
+Here you find some tips on how to set up your project such that adding dimensions and
+increasing dimensions becomes much easier.
 
 ## Example
 
-Let us assume we have a project with multiple datasets and model specifications that
-should be fitted to the data.
-
-The datasets are created by the task from the
-{doc}`tutorials <../tutorials/defining_dependencies_products>` parametrized with
-different coefficients.
-
-Below that is the task that fits different models to the datasets using a double loop.
-
-```python
-from pathlib import Path
-from pytask import task, Product
-
-
-SRC = Path(__file__).parent
-BLD = SRC / "bld"
-
-
-
-
-
-for dat
-
-
-
-for data_name in ("a", "b", "c"):
-    for model_name in ("ols", "logit", "linear_prob"):
-
-        @task
-        def task_fit_model(path_to_data: Path = SRC / f"{data_name}.pkl")
+You can write multiple loops around a task function where each loop stands for a
+different dimension. A dimension might represent different datasets or model
+specifications to analyze the datasets like in the following example. The task arguments
+are derived from the dimensions.
 
+```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/example.py
+---
+caption: task_example.py
+---
 ```
 
-1. The level of indentation is not visually pleasing and does not allow us to
-   sufficiently use every line in the file.
-
-1. Whenever we add another dimension to our problem, we need to extend every occurrence
-   of the nested loops.
-
-But, these problems are more annoying than truly
-
-The first and most important problem is that
-
-The first problem is t
-
-There are couple of problems that arise in these projects.
-
-The main problem is that with
-
-In projects where task inputs and outputs can be standardized and general interface
-
-In many projects, tasks are repeated across multiple dimensions that are stacked on top
-of each other.
-
-For example, take a project that there are four ways to simulate data and there are
-three different models that should be fitted on each dataset.
-
-Assuming there is a high-level interface to simulate data, we can loop over the task for
-simulating data four times with different arguments.
-
-Assuming there is a high-level interface to fit models to data,
-
-Assuming that you can easily switch the model the model fitting can be done in a taskThe
-cartesian product of all steps combined comprises twelve differently fitted models.
+There is nothing wrong with using nested loops for simpler projects. But, often projects
+are growing over time and you run into these problems.
 
-This guide shows an approach to organizing your tasks that can be best described as
-flattening the loops.
+- When you add a new task, you need to duplicate the nested loops in another module.
+- When you add a dimension, you need to touch multiple files in your project and add
+  another loop and level of indentation.
 
-## The data catalog
+## Solution
 
-First of all, we need to create a data catalog in a `config.py` in your project.
+The main idea for the solution is quickly explained. We will, first, formalize
+dimensions into objects and, secondly, combine them in one object such that we only have
+to iterate over instances of this object in a single loop.
 
-The data catalog plays a key role in managing lots of repetitions of tasks because it
+We will start by defining the dimensions using {class}`~typing.NamedTuple` or
+{func}`~dataclasses.dataclass`.
 
-## Scalability
+Then, we will define the object that holds both pieces of information together and for
+the lack of a better name, we will call it an experiment.
 
-Let us dive right into the aforementioned example. We start with one dataset `data.csv`.
-Then, we will create four different specifications of the data and, finally, fit three
-different models to each specification.
-
-This is the structure of the project.
-
-```text
-my_project
-├───pyproject.toml
-│
-├───src
-│   └───my_project
-│       ├────config.py
-│       │
-│       ├───data
-│       │   └────data.csv
-│       │
-│       ├───data_preparation
-│       │   ├────__init__.py
-│       │   ├────config.py
-│       │   └────task_prepare_data.py
-│       │
-│       └───estimation
-│           ├────__init__.py
-│           ├────config.py
-│           └────task_estimate_models.py
-│
-├───.pytask
-│   └────...
-│
-└───bld
+```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
+---
+caption: config.py
+---
 ```
 
-The folder structure, the main `config.py` which holds `SRC` and `BLD`, and the tasks
-follow the same structure advocated throughout the tutorials.
+There are some things to be said.
 
-New are the local configuration files in each subfolder of `my_project`, which contain
-objects shared across tasks. For example, `config.py` holds the paths to the processed
-data and the names of the data sets.
-
-```{literalinclude} ../../../docs_src/how_to_guides/bp_scaling_tasks_1.py
-```
+- The names on each dimension need to be unique and ensure that by combining them for
+  the name of the experiment, we get a unique and descriptive id.
+- Dimensions might need more attributes than just a name, like paths, or other arguments
+  for the task. Add them.
 
-The task file `task_prepare_data.py` uses these objects to build the repetitions.
+Next, we will use these newly defined data structures and see how our tasks change when
+we use them.
 
-```{literalinclude} ../../../docs_src/how_to_guides/bp_scaling_tasks_2.py
+```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
+---
+caption: task_example.py
+---
 ```
 
-All arguments for the loop and the {func}`@task <pytask.task>` decorator are built
-within a function to keep the logic in one place and the module's namespace clean.
+As you see, we replaced
 
-Ids are used to make the task {ref}`ids <ids>` more descriptive and to simplify their
-selection with {ref}`expressions <expressions>`. Here is an example of the task ids with
-an explicit id.
+## Using the `DataCatalog`
 
-```
-# With id
-.../my_project/data_preparation/task_prepare_data.py::task_prepare_data[data_0]
-```
+## Adding another dimension
 
-Next, we move to the estimation to see how we can build another repetition on top.
+## Adding another level
 
-```{literalinclude} ../../../docs_src/how_to_guides/bp_scaling_tasks_3.py
-```
-
-In the local configuration, we define `ESTIMATIONS` which combines the information on
-data and model. The dictionary's key can be used as a task id whenever the estimation is
-involved. It allows triggering all tasks related to one estimation - estimation,
-figures, tables - with one command.
-
-```console
-pytask -k linear_probability_data_0
-```
-
-And here is the task file.
-
-```{literalinclude} ../../../docs_src/how_to_guides/bp_scaling_tasks_4.py
-```
+## Executing a subset
 
-Replicating this pattern across a project allows a clean way to define repetitions.
+## Grouping and aggregating
 
 ## Extending repetitions
 
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/example.py b/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
index d0893d7a1..3e3bf14ef 100644
--- a/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
@@ -11,7 +11,7 @@
 for data_name in ("a", "b", "c"):
     for model_name in ("ols", "logit", "linear_prob"):
 
-        @task
+        @task(id=f"{model_name}-{data_name}")
         def task_fit_model(
             path_to_data: Path = SRC / f"{data_name}.pkl",
             path_to_model: Annotated[Path, Product] = BLD
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py b/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
new file mode 100644
index 000000000..741d2c19c
--- /dev/null
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
@@ -0,0 +1,14 @@
+from pathlib import Path
+from typing import Annotated
+
+from myproject.config import EXPERIMENTS
+from pytask import Product
+from pytask import task
+
+for experiment in EXPERIMENTS:
+
+    @task(id=experiment.name)
+    def task_fit_model(
+        path_to_data: experiment.dataset.path,
+        path_to_model: Annotated[Path, Product] = experiment.path,
+    ) -> None: ...
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py b/docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
new file mode 100644
index 000000000..002c669e5
--- /dev/null
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+from typing import NamedTuple
+
+SRC = Path(__file__).parent
+BLD = SRC / "bld"
+
+
+class Dataset(NamedTuple):
+    name: str
+
+    @property
+    def path(self) -> Path:
+        return SRC / f"{self.name}.pkl"
+
+
+class Model(NamedTuple):
+    name: str
+
+
+DATASETS = [Dataset("a"), Dataset("b"), Dataset("c")]
+MODELS = [Model("ols"), Model("logit"), Model("linear_prob")]
+
+
+class Experiment(NamedTuple):
+    dataset: Dataset
+    model: Model
+
+    @property
+    def name(self) -> str:
+        return f"{self.model.name}-{self.dataset.name}"
+
+    @property
+    def path(self) -> Path:
+        return BLD / f"{self.name}.pkl"
+
+
+EXPERIMENTS = [Experiment(dataset, model) for dataset in DATASETS for model in MODELS]
diff --git a/docs_src/how_to_guides/bp_scaling_tasks_1.py b/docs_src/how_to_guides/bp_scaling_tasks_1.py
deleted file mode 100644
index 52d6ea61a..000000000
--- a/docs_src/how_to_guides/bp_scaling_tasks_1.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Content of config.py
-from pathlib import Path
-
-from my_project.config import BLD
-from my_project.config import SRC
-
-DATA = {
-    "data_0": {"subset": "subset_1"},
-    "data_1": {"subset": "subset_2"},
-    "data_2": {"subset": "subset_3"},
-    "data_3": {"subset": "subset_4"},
-}
-
-
-def path_to_input_data(name: str) -> Path:
-    return SRC / "data" / "data.csv"
-
-
-def path_to_processed_data(name: str) -> Path:
-    return BLD / "data" / f"processed_{name}.pkl"
diff --git a/docs_src/how_to_guides/bp_scaling_tasks_2.py b/docs_src/how_to_guides/bp_scaling_tasks_2.py
deleted file mode 100644
index f31cfc644..000000000
--- a/docs_src/how_to_guides/bp_scaling_tasks_2.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Content of task_prepare_data.py
-from pathlib import Path
-
-from my_project.data_preparation.config import DATA
-from my_project.data_preparation.config import path_to_input_data
-from my_project.data_preparation.config import path_to_processed_data
-from pandas import pd
-from pytask import Product
-from pytask import task
-from typing_extensions import Annotated
-
-
-def _create_parametrization(data: list[str]) -> dict[str, Path]:
-    id_to_kwargs = {}
-    for data_name, kwargs in data.items():
-        id_to_kwargs[data_name] = {
-            "path_to_input_data": path_to_input_data(data_name),
-            "path_to_processed_data": path_to_processed_data(data_name),
-            **kwargs,
-        }
-
-    return id_to_kwargs
-
-
-_ID_TO_KWARGS = _create_parametrization(DATA)
-
-
-for id_, kwargs in _ID_TO_KWARGS.items():
-
-    @task(id=id_, kwargs=kwargs)
-    def task_prepare_data(
-        path_to_input_data: Path,
-        subset: str,
-        path_to_processed_data: Annotated[Path, Product],
-    ) -> None:
-        df = pd.read_csv(path_to_input_data)
-        # ... transform the data.
-        subset = df.loc[df["subset"].eq(subset)]
-        subset.to_pickle(path_to_processed_data)
diff --git a/docs_src/how_to_guides/bp_scaling_tasks_3.py b/docs_src/how_to_guides/bp_scaling_tasks_3.py
deleted file mode 100644
index 1e2103d45..000000000
--- a/docs_src/how_to_guides/bp_scaling_tasks_3.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Content of config.py
-from pathlib import Path
-
-from my_project.config import BLD
-from my_project.data_preparation.config import DATA
-
-_MODELS = ["linear_probability", "logistic_model", "decision_tree"]
-
-
-ESTIMATIONS = {
-    f"{data_name}_{model_name}": {"model": model_name, "data": data_name}
-    for model_name in _MODELS
-    for data_name in DATA
-}
-
-
-def path_to_estimation_result(name: str) -> Path:
-    return BLD / "estimation" / f"estimation_{name}.pkl"
diff --git a/docs_src/how_to_guides/bp_scaling_tasks_4.py b/docs_src/how_to_guides/bp_scaling_tasks_4.py
deleted file mode 100644
index a6c665394..000000000
--- a/docs_src/how_to_guides/bp_scaling_tasks_4.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Content of task_estimate_models.py
-from pathlib import Path
-
-from my_project.data_preparation.config import path_to_processed_data
-from my_project.estimations.config import ESTIMATIONS
-from my_project.estimations.config import path_to_estimation_result
-from pytask import Product
-from pytask import task
-from typing_extensions import Annotated
-
-
-def _create_parametrization(
-    estimations: dict[str, dict[str, str]],
-) -> dict[str, str | Path]:
-    id_to_kwargs = {}
-    for name, config in estimations.items():
-        id_to_kwargs[name] = {
-            "path_to_data": path_to_processed_data(config["data"]),
-            "model": config["model"],
-            "path_to_estimation": path_to_estimation_result(name),
-        }
-
-    return id_to_kwargs
-
-
-_ID_TO_KWARGS = _create_parametrization(ESTIMATIONS)
-
-
-for id_, kwargs in _ID_TO_KWARGS.items():
-
-    @task(id=id_, kwargs=kwargs)
-    def task_estmate_models(
-        path_to_data: Path, model: str, path_to_estimation: Annotated[Path, Product]
-    ) -> None:
-        if model == "linear_probability":
-            ...