From 3bdc31713784703faec6e4f839af7a31849236fb Mon Sep 17 00:00:00 2001 From: GioeleB00 Date: Sun, 6 Jul 2025 16:15:42 +0200 Subject: [PATCH 1/5] moving file to correct folder --- .../{integration_tests => unit_tests}/test_sampler_helper.md | 0 .../{integration_tests => unit_tests}/test_simulation_input.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename documentation/tests_documentation/{integration_tests => unit_tests}/test_sampler_helper.md (100%) rename documentation/tests_documentation/{integration_tests => unit_tests}/test_simulation_input.md (100%) diff --git a/documentation/tests_documentation/integration_tests/test_sampler_helper.md b/documentation/tests_documentation/unit_tests/test_sampler_helper.md similarity index 100% rename from documentation/tests_documentation/integration_tests/test_sampler_helper.md rename to documentation/tests_documentation/unit_tests/test_sampler_helper.md diff --git a/documentation/tests_documentation/integration_tests/test_simulation_input.md b/documentation/tests_documentation/unit_tests/test_simulation_input.md similarity index 100% rename from documentation/tests_documentation/integration_tests/test_simulation_input.md rename to documentation/tests_documentation/unit_tests/test_simulation_input.md From 62c6287b52836f1bdd7152bd18f654b95a1a06ab Mon Sep 17 00:00:00 2001 From: GioeleB00 Date: Sun, 6 Jul 2025 19:21:16 +0200 Subject: [PATCH 2/5] Documentation added --- documentation/FASTSIM_VISION.md | 41 +++ .../requests_generator.md | 280 ++++++++++++++++++ .../core/event_samplers/poisson_poisson.py | 8 +- 3 files changed, 325 insertions(+), 4 deletions(-) create mode 100644 documentation/FASTSIM_VISION.md create mode 100644 documentation/backend_documentation/requests_generator.md diff --git a/documentation/FASTSIM_VISION.md b/documentation/FASTSIM_VISION.md new file mode 100644 index 0000000..7119807 --- /dev/null +++ b/documentation/FASTSIM_VISION.md @@ -0,0 +1,41 @@ +## 1 Why FastSim? + +FastAPI + Uvicorn gives Python teams a lightning-fast async stack, yet sizing it for production still means guess-work, costly cloud load-tests or late surprises. **FastSim** fills that gap by becoming a **digital twin** of your actual service: + +* It **replays** your FastAPI + Uvicorn event-loop behavior in SimPy, generating exactly the same kinds of asynchronous steps (parsing, CPU work, I/O, LLM calls) that happen in real code. +* It **models** your infrastructure primitives—CPU cores (via a SimPy `Resource`), database pools, rate-limiters, even GPU inference quotas—so you can see queue lengths, scheduling delays, resource utilization, and end-to-end latency. +* It **outputs** the very metrics you’d scrape in production (p50/p95/p99 latency, ready-queue lag, current & max concurrency, throughput, cost per LLM call), but entirely offline, in seconds. + +With FastSim you can ask *“What happens if traffic doubles on Black Friday?”*, *“How many cores to keep p95 < 100 ms?”* or *“Is our LLM-driven endpoint ready for prime time?”*—and get quantitative answers **before** you deploy. + +**Outcome:** data-driven capacity planning, early performance tuning, and far fewer “surprises” once you hit production. + +--- + +## 2 Project Goals + +| # | Goal | Practical Outcome | +| - | ------------------------- | ------------------------------------------------------------------------ | +| 1 | **Pre-production sizing** | Know core-count, pool-size, replica-count to hit SLA. | +| 2 | **Scenario lab** | Explore traffic models, endpoint mixes, latency distributions, RTT, etc. | +| 3 | **Twin metrics** | Produce the same metrics you’ll scrape in prod (latency, queue, CPU). | +| 4 | **Rapid iteration** | One YAML/JSON config or REST call → full report. | +| 5 | **Educational value** | Visualise how GIL lag, queue length, concurrency react to load. | + +--- + +## 3 Who benefits & why (detailed) + +| Audience | Pain-point solved | FastSim value | +| ------------------------------ | --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **Backend engineers** | Unsure if 4 vCPU container survives a marketing spike | Run *what-if* load, tweak CPU cores / pool size, get p95 & max-concurrency before merging. | +| **DevOps / SRE** | Guesswork in capacity planning; cost of over-provisioning | Simulate 1 → N replicas, autoscaler thresholds, DB-pool size; pick the cheapest config meeting SLA. | +| **ML / LLM product teams** | LLM inference cost & latency hard to predict | Model the LLM step with a price + latency distribution; estimate \$/req and GPU batch gains without real GPU. | +| **Educators / Trainers** | Students struggle to “see” event-loop internals | Visualise GIL ready-queue lag, CPU vs I/O steps, effect of blocking code—perfect for live demos and labs. | +| **Consultants / Architects** | Need a quick PoC of new designs for clients | Drop endpoint definitions in YAML and demo throughput / latency under projected load in minutes. | +| **Open-source community** | Lacks a lightweight Python simulator for ASGI workloads | Extensible codebase; easy to plug in new resources (rate-limit, cache) or traffic models (spike, uniform ramp). | +| **System-design interviewees** | Hard to quantify trade-offs in whiteboard interviews | Prototype real-time metrics—queue lengths, concurrency, latency distributions—to demonstrate in interviews how your design scales and where bottlenecks lie. | + +--- + +**Bottom-line:** FastSim turns abstract architecture diagrams into concrete numbers—*before* spinning up expensive cloud environments—so you can build, validate and discuss your designs with full confidence. diff --git a/documentation/backend_documentation/requests_generator.md b/documentation/backend_documentation/requests_generator.md new file mode 100644 index 0000000..64b0f66 --- /dev/null +++ b/documentation/backend_documentation/requests_generator.md @@ -0,0 +1,280 @@ +# Requests Generator + +This document describes the design of the **requests generator**, which models a stream of user requests to a given endpoint over time. + +--- + +## Model Inputs and Output + +Following the FastSim philosophy, we accept a small set of input parameters to drive a “what-if” analysis in a pre-production environment. These inputs let you explore reliability and cost implications under different traffic scenarios. + +**Inputs** + +1. **Average concurrent users** – expected number of users (or sessions) simultaneously hitting the endpoint. +2. **Average requests per minute per user** – average number of requests each user issues per minute. +3. **Simulation time** – total duration of the simulation, in seconds. + +**Output** +A continuous sequence of timestamps (seconds) marking individual request arrivals. + +--- + +## Model Assumptions + +* *Concurrent users* and *requests per minute per user* are **random variables**. +* *Simulation time* is **deterministic**. + +We model: + +* **Requests per minute per user** as Poisson($\lambda_r$). +* **Concurrent users** as either Poisson($\lambda_u$) or truncated Normal. + +```python +from pydantic import BaseModel +from typing import Literal + +class RVConfig(BaseModel): + """Configure a random-variable parameter.""" + mean: float + distribution: Literal["poisson", "normal", "gaussian"] = "poisson" + variance: float | None = None # required only for normal/gaussian + +class SimulationInput(BaseModel): + """Define simulation inputs.""" + avg_active_users: RVConfig + avg_request_per_minute_per_user: RVConfig + total_simulation_time: int | None = None +``` + +--- + +## Aggregate Request Rate + +From the two random inputs we define the **per-second aggregate rate** $\Lambda$: + +$$ +\Lambda + = \text{concurrent\_users} + \;\times\; + \frac{\text{requests\_per\_minute\_per\_user}}{60} + \quad[\text{requests/s}]. +$$ + +--- + +## 1. Poisson → Exponential Refresher + +### 1.1 Homogeneous Poisson process + +A Poisson process of rate $\lambda$ has + +$$ +\Pr\{N(t)=k\} + = e^{-\lambda t}\,\frac{(\lambda t)^{k}}{k!},\quad k=0,1,2,\dots +$$ + +### 1.2 Waiting time to first event + +Define $T_1=\inf\{t>0:N(t)=1\}$. +The survival function is + +$$ +\Pr\{T_1>t\} + = \Pr\{N(t)=0\} + = e^{-\lambda t}, +$$ + +so the CDF is + +$$ +F_{T_1}(t) = 1 - e^{-\lambda t},\quad t\ge0, +$$ + +and the density $f(t)=\lambda\,e^{-\lambda t}$. Thus + +$$ +T_1 \sim \mathrm{Exp}(\lambda), +$$ + +and by memorylessness every inter-arrival gap $\Delta t_i$ is i.i.d. Exp($\lambda$). + +### 1.3 Inverse-CDF sampling + +To draw $\Delta t\sim\mathrm{Exp}(\lambda)$: + +1. Sample $U\sim\mathcal U(0,1)$. +2. Solve $U=1-e^{-\lambda\,\Delta t}$;$\Rightarrow\;\Delta t=-\ln(1-U)/\lambda$. +3. Equivalent compact form: + $\displaystyle \Delta t = -\,\ln(U)/\lambda$. + +--- + +## 2. Poisson × Poisson Workload + +### 2.1 Notation + +| Symbol | Meaning | Law | +| --------------------------------- | --------------------------------------- | -------- | +| $U\sim\mathrm{Pois}(\lambda_u)$ | active users in current 1-minute window | Poisson | +| $R_i\sim\mathrm{Pois}(\lambda_r)$ | requests per minute by user *i* | Poisson | +| $N=\sum_{i=1}^U R_i$ | total requests in that minute | compound | +| $\Lambda=N/60$ | aggregate rate (requests / second) | compound | + +All $R_i$ are independent of each other and of $U$. + +### 2.2 Conditional sum ⇒ Poisson + +Given $U=u$: + +$$ +N\mid U=u +=\sum_{i=1}^{u}R_i +\;\sim\;\mathrm{Pois}(u\,\lambda_r). +$$ + +### 2.3 Unconditional law of $N$ + +By the law of total probability: + +$$ +\Pr\{N=n\} +=\sum_{u=0}^{\infty} +\Pr\{U=u\}\; +\Pr\{N=n\mid U=u\} +\;=\; +e^{-\lambda_u}\,\frac1{n!} +\sum_{u=0}^{\infty} +\frac{\lambda_u^u}{u!}\, +e^{-u\lambda_r}\,(u\lambda_r)^n. +$$ + +This is the **Poisson–Poisson compound** (Borel–Tanner) distribution. + +--- + +## 3. Exact Hierarchical Sampler + +Rather than invert the discrete CDF above, we exploit the conditional structure: + +```python +# Hierarchical sampler code snippet +now = 0.0 # virtual clock (s) +window_end = 0.0 # end of the current user window +Lambda = 0.0 # aggregate rate Λ (req/s) + +while now < simulation_time: + # (Re)sample U at the start of each window + if now >= window_end: + window_end = now + float(sampling_window_s) + users = poisson_variable_generator(mean_concurrent_user, rng) + Lambda = users * mean_req_per_sec_per_user + + # No users → fast-forward to next window + if Lambda <= 0.0: + now = window_end + continue + + # Exponential gap from a protected uniform value + u_raw = max(uniform_variable_generator(rng), 1e-15) + delta_t = -math.log(1.0 - u_raw) / Lambda + + # End simulation if the next event exceeds the horizon + if now + delta_t > simulation_time: + break + + # If the gap crosses the window boundary, jump to it + if now + delta_t >= window_end: + now = window_end + continue + + now += delta_t + yield delta_t +``` + +Because each conditional step matches the exact Poisson→Exponential law, this two-stage algorithm reproduces the same joint distribution as analytically inverting the compound CDF, but with minimal computation. + +--- + +## 4. Validity of the hierarchical sampler + +The validity of the hierarchical sampler relies on a structural property of the model: + +$$ +N \;=\; \sum_{i=1}^{U} R_i, +$$ + +where each $R_i \sim \mathrm{Pois}(\lambda_r)$ is independent of the others and of $U$. Because the Poisson family is closed under convolution, + +$$ +N \,\big|\, U=u \;\sim\; \mathrm{Pois}\!\bigl(u\,\lambda_r\bigr). +$$ + +This result has two important consequences: + +1. **Deterministic conditional rate** – Given $U=u$, the aggregate request arrivals constitute a homogeneous Poisson process with the *deterministic* rate + + $$ + \Lambda = \frac{u\,\lambda_r}{60}. + $$ + + All inter-arrival gaps are therefore i.i.d. exponential with parameter $\Lambda$, allowing us to use the standard inverse–CDF formula for each gap. + +2. **Layered uncertainty handling** – The randomness associated with $U$ is handled in an outer step (sampling $U$ once per window), while the inner step leverages the well-known Poisson→Exponential correspondence. This two-level construction reproduces exactly the joint distribution obtained by first drawing $\Lambda = N/60$ from the compound Poisson law and then drawing gaps conditional on $\Lambda$. + +If the total count could **not** be written as a sum of independent Poisson variables, the conditional distribution of $N$ would no longer be Poisson and the exponential-gap shortcut would not apply. In that situation one would need to work directly with the (generally more complex) mixed distribution of $\Lambda$ or adopt another specialized sampling scheme. + + + +## 5. Equivalence to CDF Inversion + +By the law of total probability, for any event set $A$: + +$$ +\Pr\{(\Lambda,\Delta t_1,\dots)\in A\} +=\sum_{u=0}^\infty +\Pr\{U=u\}\; +\Pr\{(\Lambda,\Delta t_1,\dots)\in A\mid U=u\}. +$$ + +Step 1 samples $\Pr\{U=u\}$, step 2–3 sample the conditional exponential gaps. Because these two factors exactly match the mixture definition of the compound CDF, the hierarchical sampler **is** an exact implementation of two-stage CDF inversion, avoiding any explicit inversion of an infinite series. + +--- + +## 6. Gaussian × Poisson Variant + +If concurrent users follow a truncated Normal, + +$$ +U\sim \max\{0,\;\mathcal N(\mu_u,\sigma_u^2)\}, +$$ + +steps 2–3 remain unchanged; only step 1 draws $U$ from a continuous law. The resulting mixture is continuous, yet the hierarchical sampler remains exact. + +--- + +## 7. Time Window + +The sampling window length governs how often we re-sample $U$. It should reflect the timescale over which user count fluctuations become significant. Our default is **60 s**, but you can adjust this parameter in your configuration before each simulation. + +--- + +## Limitations of the Requests Model + +1. **Independence assumption** + Assumes per-user streams and $U$ are independent. Real traffic often exhibits user-behavior correlations (e.g., flash crowds). + +2. **Exponential inter-arrival times** + Implies memorylessness; cannot capture self-throttling or long-range dependence found in real workloads. + +3. **No diurnal/trend component** + User count $U$ is IID per window. To model seasonality or trends, you must vary $\lambda_u(t)$ externally. + +4. **No burst-control or rate-limiting** + Does not simulate client-side throttling or server back-pressure. Any rate-limit logic must be added externally. + +5. **Gaussian truncation artifacts** + In the Gaussian–Poisson variant, truncating negatives to zero and rounding can under-estimate extreme user counts. + + +**Key takeaway:** By structuring the generator as +$\Lambda = U\,\lambda_r/60$ with a two-stage Poisson→Exponential sampler, FastSim efficiently reproduces compound Poisson traffic dynamics without any complex CDF inversion. diff --git a/src/app/core/event_samplers/poisson_poisson.py b/src/app/core/event_samplers/poisson_poisson.py index 37c2da5..1a9e210 100644 --- a/src/app/core/event_samplers/poisson_poisson.py +++ b/src/app/core/event_samplers/poisson_poisson.py @@ -55,23 +55,23 @@ def poisson_poisson_sampling( now = 0.0 # virtual clock (s) window_end = 0.0 # end of the current user window - lam = 0.0 # aggregate rate Λ (req/s) + Lambda = 0.0 # aggregate rate Λ (req/s) while now < simulation_time: # (Re)sample U at the start of each window if now >= window_end: window_end = now + float(sampling_window_s) users = poisson_variable_generator(mean_concurrent_user, rng) - lam = users * mean_req_per_sec_per_user + Lambda = users * mean_req_per_sec_per_user # No users → fast-forward to next window - if lam <= 0.0: + if Lambda <= 0.0: now = window_end continue # Exponential gap from a protected uniform value u_raw = max(uniform_variable_generator(rng), 1e-15) - delta_t = -math.log(1.0 - u_raw) / lam + delta_t = -math.log(1.0 - u_raw) / Lambda # End simulation if the next event exceeds the horizon if now + delta_t > simulation_time: From 5642c897c27a4f75a41f58a47dc32d284dada178 Mon Sep 17 00:00:00 2001 From: Gioele Botta <157820569+GioeleB00@users.noreply.github.com> Date: Mon, 7 Jul 2025 12:31:54 +0200 Subject: [PATCH 3/5] Update requests_generator.md --- documentation/backend_documentation/requests_generator.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/documentation/backend_documentation/requests_generator.md b/documentation/backend_documentation/requests_generator.md index 64b0f66..556c4f4 100644 --- a/documentation/backend_documentation/requests_generator.md +++ b/documentation/backend_documentation/requests_generator.md @@ -28,6 +28,7 @@ We model: * **Requests per minute per user** as Poisson($\lambda_r$). * **Concurrent users** as either Poisson($\lambda_u$) or truncated Normal. +* **The variables are independent** ```python from pydantic import BaseModel @@ -120,7 +121,7 @@ To draw $\Delta t\sim\mathrm{Exp}(\lambda)$: | $N=\sum_{i=1}^U R_i$ | total requests in that minute | compound | | $\Lambda=N/60$ | aggregate rate (requests / second) | compound | -All $R_i$ are independent of each other and of $U$. +The procedure here rely heavily on the independence of our random variables. ### 2.2 Conditional sum ⇒ Poisson From 6d764dd4253abab7b23c84c30a959bc61af6373c Mon Sep 17 00:00:00 2001 From: GioeleB00 Date: Thu, 10 Jul 2025 17:22:19 +0200 Subject: [PATCH 4/5] test Added, introduced constants for the sampling window --- src/app/config/constants.py | 9 +- .../core/event_samplers/gaussian_poisson.py | 5 +- .../core/event_samplers/poisson_poisson.py | 13 +- src/app/core/simulation/simulation_run.py | 10 +- src/app/schemas/simulation_input.py | 39 ++-- tests/unit/sampler/test_gaussian_poisson.py | 93 ++++++++ tests/unit/sampler/test_poisson_posson.py | 119 ++++++++++ .../simulation/test_events_generator_input.py | 210 ++++++++++++++++++ .../simulation/test_requests_generator.py | 5 +- .../unit/simulation/test_simulation_input.py | 93 -------- 10 files changed, 466 insertions(+), 130 deletions(-) create mode 100644 tests/unit/sampler/test_gaussian_poisson.py create mode 100644 tests/unit/sampler/test_poisson_posson.py create mode 100644 tests/unit/simulation/test_events_generator_input.py delete mode 100644 tests/unit/simulation/test_simulation_input.py diff --git a/src/app/config/constants.py b/src/app/config/constants.py index fb0c231..5146c62 100644 --- a/src/app/config/constants.py +++ b/src/app/config/constants.py @@ -6,6 +6,9 @@ class TimeDefaults(IntEnum): """Default time-related constants (all in seconds).""" - MIN_TO_SEC = 60 # 1 minute → 60 s - SAMPLING_WINDOW = 60 # keep U(t) constant for 60 s - SIMULATION_HORIZON = 3_600 # run 1 h if user gives no other value + MIN_TO_SEC = 60 # 1 minute → 60 s + USER_SAMPLING_WINDOW = 60 # keep U(t) constant for 60 s, default + SIMULATION_TIME = 3_600 # run 1 h if user gives no other value + MIN_SIMULATION_TIME = 1800 # min simulation time + MIN_USER_SAMPLING_WINDOW = 1 # 1 second + MAX_USER_SAMPLING_WINDOW = 120 # 2 minutes diff --git a/src/app/core/event_samplers/gaussian_poisson.py b/src/app/core/event_samplers/gaussian_poisson.py index 2239626..03434b5 100644 --- a/src/app/core/event_samplers/gaussian_poisson.py +++ b/src/app/core/event_samplers/gaussian_poisson.py @@ -22,7 +22,6 @@ def gaussian_poisson_sampling( input_data: SimulationInput, *, - sampling_window_s: int = TimeDefaults.SAMPLING_WINDOW.value, rng: np.random.Generator | None = None, ) -> Generator[float, None, None]: """ @@ -41,10 +40,12 @@ def gaussian_poisson_sampling( rng = rng or np.random.default_rng() simulation_time = input_data.total_simulation_time + user_sampling_window = input_data.user_sampling_window # pydantic in the validation assign a value and mypy is not # complaining because a None cannot be compared in the loop # to a float assert simulation_time is not None + assert user_sampling_window is not None # λ_u : mean concurrent users per window mean_concurrent_user = float(input_data.avg_active_users.mean) @@ -68,7 +69,7 @@ def gaussian_poisson_sampling( while now < simulation_time: # (Re)sample U at the start of each window if now >= window_end: - window_end = now + float(sampling_window_s) + window_end = now + float(user_sampling_window) users = truncated_gaussian_generator( mean_concurrent_user, variance_concurrent_user, diff --git a/src/app/core/event_samplers/poisson_poisson.py b/src/app/core/event_samplers/poisson_poisson.py index 1a9e210..5f0598c 100644 --- a/src/app/core/event_samplers/poisson_poisson.py +++ b/src/app/core/event_samplers/poisson_poisson.py @@ -19,7 +19,6 @@ def poisson_poisson_sampling( input_data: SimulationInput, *, - sampling_window_s: int = TimeDefaults.SAMPLING_WINDOW.value, rng: np.random.Generator | None = None, ) -> Generator[float, None, None]: """ @@ -38,10 +37,12 @@ def poisson_poisson_sampling( rng = rng or np.random.default_rng() simulation_time = input_data.total_simulation_time + user_sampling_window = input_data.user_sampling_window # pydantic in the validation assign a value and mypy is not # complaining because a None cannot be compared in the loop # to a float assert simulation_time is not None + assert user_sampling_window is not None # λ_u : mean concurrent users per window mean_concurrent_user = float(input_data.avg_active_users.mean) @@ -55,23 +56,23 @@ def poisson_poisson_sampling( now = 0.0 # virtual clock (s) window_end = 0.0 # end of the current user window - Lambda = 0.0 # aggregate rate Λ (req/s) + lam = 0.0 # aggregate rate Λ (req/s) while now < simulation_time: # (Re)sample U at the start of each window if now >= window_end: - window_end = now + float(sampling_window_s) + window_end = now + float(user_sampling_window) users = poisson_variable_generator(mean_concurrent_user, rng) - Lambda = users * mean_req_per_sec_per_user + lam = users * mean_req_per_sec_per_user # No users → fast-forward to next window - if Lambda <= 0.0: + if lam <= 0.0: now = window_end continue # Exponential gap from a protected uniform value u_raw = max(uniform_variable_generator(rng), 1e-15) - delta_t = -math.log(1.0 - u_raw) / Lambda + delta_t = -math.log(1.0 - u_raw) / lam # End simulation if the next event exceeds the horizon if now + delta_t > simulation_time: diff --git a/src/app/core/simulation/simulation_run.py b/src/app/core/simulation/simulation_run.py index b2f72f3..d3f52f6 100644 --- a/src/app/core/simulation/simulation_run.py +++ b/src/app/core/simulation/simulation_run.py @@ -20,15 +20,15 @@ def run_simulation( - data: SimulationInput, + input_data: SimulationInput, *, rng: np.random.Generator, ) -> SimulationOutput: """Simulation executor in Simpy""" - gaps: Generator[float, None, None] = requests_generator(data, rng=rng) + gaps: Generator[float, None, None] = requests_generator(input_data, rng=rng) env = simpy.Environment() - simulation_time = data.total_simulation_time + simulation_time = input_data.total_simulation_time # pydantic in the validation assign a value and mypy is not # complaining because a None cannot be compared in the loop # to a float @@ -51,6 +51,6 @@ def arrival_process( return SimulationOutput( total_requests=total_request_per_time_period, - metric_2=str(data.avg_request_per_minute_per_user.mean), - metric_n=str(data.avg_active_users.mean), + metric_2=str(input_data.avg_request_per_minute_per_user.mean), + metric_n=str(input_data.avg_active_users.mean), ) diff --git a/src/app/schemas/simulation_input.py b/src/app/schemas/simulation_input.py index 2197be1..ed702bb 100644 --- a/src/app/schemas/simulation_input.py +++ b/src/app/schemas/simulation_input.py @@ -2,7 +2,7 @@ from typing import Literal -from pydantic import BaseModel, field_validator, model_validator +from pydantic import BaseModel, Field, field_validator, model_validator from app.config.constants import TimeDefaults @@ -37,22 +37,23 @@ class SimulationInput(BaseModel): avg_active_users: RVConfig avg_request_per_minute_per_user: RVConfig - total_simulation_time: int | None = None - - @field_validator("total_simulation_time", mode="before") - def check_simulation_time(cls, v: object) -> int: # noqa: N805 - """ - Assign constant value to total sim time if is None - check if it is of the right type - impose a lower boundary for the simulation - """ - if v is None: - v = TimeDefaults.SIMULATION_HORIZON.value - if not isinstance(v, int): - err_msg_type = "the simulation time must be an integer" - raise ValueError(err_msg_type) # noqa: TRY004 - if v <= 60: - err_msg_val = "the simulation must be at least 60 seconds" - raise ValueError(err_msg_val) - return v + total_simulation_time: int = Field( + default=TimeDefaults.SIMULATION_TIME.value, + ge=TimeDefaults.MIN_SIMULATION_TIME.value, # minimum simulation time in seconds + description=( + f"Simulation time in seconds (>= {TimeDefaults.MIN_SIMULATION_TIME.value})." + ), + ) + + user_sampling_window: int = Field( + default=TimeDefaults.USER_SAMPLING_WINDOW.value, + ge=TimeDefaults.MIN_USER_SAMPLING_WINDOW.value, + le=TimeDefaults.MAX_USER_SAMPLING_WINDOW.value, + description=( + "Sampling window in seconds " + f"({TimeDefaults.MIN_USER_SAMPLING_WINDOW.value}-" + f"{TimeDefaults.MAX_USER_SAMPLING_WINDOW.value})." + ), + ) + diff --git a/tests/unit/sampler/test_gaussian_poisson.py b/tests/unit/sampler/test_gaussian_poisson.py new file mode 100644 index 0000000..6d49902 --- /dev/null +++ b/tests/unit/sampler/test_gaussian_poisson.py @@ -0,0 +1,93 @@ +"""Unit tests for gaussian_poisson_sampling.""" + +from __future__ import annotations + +import itertools +from types import GeneratorType + +import numpy as np +import pytest + +from app.config.constants import TimeDefaults +from app.core.event_samplers.gaussian_poisson import gaussian_poisson_sampling +from app.schemas.simulation_input import RVConfig, SimulationInput + +# --------------------------------------------------------------------------- +# Fixture +# --------------------------------------------------------------------------- + + +@pytest.fixture +def base_input() -> SimulationInput: + """Return a minimal, valid SimulationInput for the Gaussian-Poisson sampler.""" + return SimulationInput( + avg_active_users=RVConfig( + mean=10.0, variance=4.0, distribution="gaussian", + ), + avg_request_per_minute_per_user=RVConfig(mean=30.0), + total_simulation_time=TimeDefaults.MIN_SIMULATION_TIME.value, + user_sampling_window=TimeDefaults.USER_SAMPLING_WINDOW.value, + ) + + +# --------------------------------------------------------------------------- +# Basic behaviour +# --------------------------------------------------------------------------- + + +def test_returns_generator_type(base_input: SimulationInput) -> None: + """The function must return a generator object.""" + rng = np.random.default_rng(0) + gen = gaussian_poisson_sampling(base_input, rng=rng) + assert isinstance(gen, GeneratorType) + + +def test_generates_positive_gaps(base_input: SimulationInput) -> None: + """ + With nominal parameters the sampler should emit at least a few positive + gaps and no gap must be non-positive. + """ + rng = np.random.default_rng(42) + gaps: list[float] = list( + itertools.islice(gaussian_poisson_sampling(base_input, rng=rng), 1000), + ) + + # At least one event is expected. + assert gaps + # No gap may be negative or zero. + assert all(gap > 0.0 for gap in gaps) + # The cumulative time of gaps must stay below the horizon. + assert sum(gaps) < base_input.total_simulation_time + + +# --------------------------------------------------------------------------- +# Edge-case: zero users ⇒ no events +# --------------------------------------------------------------------------- + + +def test_zero_users_produces_no_events( + monkeypatch: pytest.MonkeyPatch, + base_input: SimulationInput, +) -> None: + """ + If every Gaussian draw returns 0 users, Λ == 0, + hence the generator must yield no events at all. + """ + + def fake_truncated_gaussian( + mean: float, + var: float, + rng: np.random.Generator, + ) -> float: + return 0.0 # force U = 0 + + # Patch the helper so that it always returns 0 users. + monkeypatch.setattr( + "app.core.event_samplers.gaussian_poisson.truncated_gaussian_generator", + fake_truncated_gaussian, + ) + + rng = np.random.default_rng(123) + gaps = list(gaussian_poisson_sampling(base_input, rng=rng)) + + assert gaps == [] # no events should be generated diff --git a/tests/unit/sampler/test_poisson_posson.py b/tests/unit/sampler/test_poisson_posson.py new file mode 100644 index 0000000..0ea539e --- /dev/null +++ b/tests/unit/sampler/test_poisson_posson.py @@ -0,0 +1,119 @@ +"""Unit tests for the poisson_poisson_sampling generator.""" + +from __future__ import annotations + +import itertools +import math +from types import GeneratorType + +import numpy as np +import pytest + +from app.config.constants import TimeDefaults +from app.core.event_samplers.poisson_poisson import poisson_poisson_sampling +from app.schemas.simulation_input import RVConfig, SimulationInput + + +@pytest.fixture +def base_input() -> SimulationInput: + """Return a minimal-valid SimulationInput for the sampler tests.""" + return SimulationInput( + # 1 average concurrent user … + avg_active_users={"mean": 1.0, "distribution": "poisson"}, + # … sending on average 60 req/min → 1 req/s + avg_request_per_minute_per_user={"mean": 60.0, "distribution": "poisson"}, + total_simulation_time=TimeDefaults.MIN_SIMULATION_TIME.value, # 30 min + user_sampling_window=TimeDefaults.USER_SAMPLING_WINDOW.value, # 60 s + ) + + +# --------------------------------------------------------------------- +# BASIC SHAPE / TYPE TESTS +# --------------------------------------------------------------------- + + +def test_sampler_returns_generator(base_input: SimulationInput) -> None: + """The function must return a real generator object.""" + rng = np.random.default_rng(0) + gen = poisson_poisson_sampling(base_input, rng=rng) + + assert isinstance(gen, GeneratorType) + + +def test_all_gaps_are_positive(base_input: SimulationInput) -> None: + """Every yielded inter-arrival gap Δt must be > 0.""" + rng = np.random.default_rng(1) + gaps: list[float] = list( + itertools.islice(poisson_poisson_sampling(base_input, rng=rng), 1_000), + ) + + # None of the first 1 000 gaps (if any) can be negative or zero + assert all(gap > 0.0 for gap in gaps) + + +# --------------------------------------------------------------------- +# REPRODUCIBILITY WITH FIXED RNG SEED +# --------------------------------------------------------------------- + + +def test_sampler_is_reproducible_with_fixed_seed(base_input: SimulationInput) -> None: + """Same seed ⇒ identical first N gaps.""" + seed = 42 + n_samples = 15 + + gaps_1 = list( + itertools.islice( + poisson_poisson_sampling( + base_input, rng=np.random.default_rng(seed), + ), + n_samples, + ), + ) + gaps_2 = list( + itertools.islice( + poisson_poisson_sampling( + base_input, rng=np.random.default_rng(seed), + ), + n_samples, + ), + ) + + assert gaps_1 == gaps_2 + + +# --------------------------------------------------------------------- +# EDGE-CASE: ZERO USERS ⇒ NO EVENTS +# --------------------------------------------------------------------- + + +def test_zero_users_produces_no_events(base_input: SimulationInput) -> None: + """ + With mean concurrent users == 0 the Poisson draw is almost surely 0, + so Λ = 0 and the generator should yield no events. + """ + input_data = SimulationInput( + avg_active_users=RVConfig(mean=0.0, distribution="poisson"), + avg_request_per_minute_per_user=RVConfig(mean=60.0, distribution="poisson"), + total_simulation_time=TimeDefaults.MIN_SIMULATION_TIME.value, + user_sampling_window=TimeDefaults.USER_SAMPLING_WINDOW.value, + ) + + rng = np.random.default_rng(123) + gaps = list(poisson_poisson_sampling(input_data, rng=rng)) + + assert gaps == [] # no events expected + +# --------------------------------------------------------------------- +# CUMULATIVE TIME ALWAYS < SIMULATION HORIZON +# --------------------------------------------------------------------- + + +def test_cumulative_time_never_exceeds_horizon(base_input: SimulationInput) -> None: + """ΣΔt (virtual clock) must stay strictly below total_simulation_time.""" + rng = np.random.default_rng(7) + gaps = list(poisson_poisson_sampling(base_input, rng=rng)) + + cum_time = math.fsum(gaps) + # Even if the virtual clock può saltare quando λ == 0, + # i gap sommati non devono mai superare l'orizzonte. + assert cum_time < base_input.total_simulation_time diff --git a/tests/unit/simulation/test_events_generator_input.py b/tests/unit/simulation/test_events_generator_input.py new file mode 100644 index 0000000..39c5216 --- /dev/null +++ b/tests/unit/simulation/test_events_generator_input.py @@ -0,0 +1,210 @@ +import pytest +from pydantic import ValidationError + +from app.config.constants import TimeDefaults +from app.schemas.simulation_input import RVConfig, SimulationInput + +# -------------------------------------------------------------------------- +# TEST RANDOM VARIABLE CONFIGURATION +# -------------------------------------------------------------------------- + +def test_normal_sets_variance_to_mean() -> None: + """When distribution='normal' and variance is omitted, variance == mean.""" + cfg = RVConfig(mean=10, distribution="normal") + assert cfg.variance == 10.0 + + +def test_poisson_keeps_variance_none() -> None: + """When distribution='poisson' and variance is omitted, variance stays None.""" + cfg = RVConfig(mean=5, distribution="poisson") + assert cfg.variance is None + + +def test_explicit_variance_is_preserved() -> None: + """If the user supplies variance explicitly, it is preserved unchanged.""" + cfg = RVConfig(mean=8, distribution="normal", variance=4) + assert cfg.variance == 4.0 + + +def test_mean_must_be_numeric() -> None: + """A non-numeric mean raises a ValidationError with our custom message.""" + with pytest.raises(ValidationError) as excinfo: + RVConfig(mean="not a number", distribution="poisson") + + # Check that at least one error refers to the 'mean' field + assert any(err["loc"] == ("mean",) for err in excinfo.value.errors()) + assert "mean must be a number" in excinfo.value.errors()[0]["msg"] + + +def test_missing_mean_field() -> None: + """Omitting the mean field raises a 'field required' ValidationError.""" + with pytest.raises(ValidationError) as excinfo: + # Using model_validate avoids the constructor signature check + RVConfig.model_validate({"distribution": "normal"}) + + assert any( + err["loc"] == ("mean",) and err["type"] == "missing" + for err in excinfo.value.errors() + ) + +def test_gaussian_sets_variance_to_mean() -> None: + """When distribution='gaussian' and variance is omitted, variance == mean.""" + cfg = RVConfig(mean=12.5, distribution="gaussian") + assert cfg.variance == pytest.approx(12.5) + + +def test_default_distribution_is_poisson() -> None: + """ + When distribution is omitted, it defaults to 'poisson' and + variance stays None. + """ + cfg = RVConfig(mean=3.3) + assert cfg.distribution == "poisson" + assert cfg.variance is None + + +def test_explicit_variance_kept_for_poisson() -> None: + """If the user supplies variance even for poisson, it is preserved.""" + cfg = RVConfig(mean=4.0, distribution="poisson", variance=2.2) + assert cfg.variance == pytest.approx(2.2) + + +def test_invalid_distribution_raises() -> None: + """Supplying a non-supported distribution literal raises ValidationError.""" + with pytest.raises(ValidationError) as excinfo: + RVConfig(mean=5.0, distribution="not_a_dist") + + errors = excinfo.value.errors() + # Only assert there is at least one error for the 'distribution' field: + assert any(e["loc"] == ("distribution",) for e in errors) + +# -------------------------------------------------------------------------- +# TEST FIELD VALIDATOR USER SAMPLING WINDOW +# -------------------------------------------------------------------------- + +def test_default_user_sampling_window() -> None: + """When user_sampling_window is omitted, it defaults to USER_SAMPLING_WINDOW.""" + inp = SimulationInput( + avg_active_users={"mean": 1.0, "distribution": "poisson"}, + avg_request_per_minute_per_user={"mean": 1.0, "distribution": "poisson"}, + ) + assert inp.user_sampling_window == TimeDefaults.USER_SAMPLING_WINDOW.value + + +def test_explicit_user_sampling_window_kept() -> None: + """An explicit user_sampling_window value is preserved unchanged.""" + custom_window = 30 + inp = SimulationInput( + avg_active_users={"mean": 1.0, "distribution": "poisson"}, + avg_request_per_minute_per_user={"mean": 1.0, "distribution": "poisson"}, + user_sampling_window=custom_window, + ) + assert inp.user_sampling_window == custom_window + + +def test_user_sampling_window_not_int_raises() -> None: + """A non-integer user_sampling_window raises a ValidationError.""" + with pytest.raises(ValidationError) as excinfo: + + SimulationInput( + avg_active_users={"mean": 1.0, "distribution": "poisson"}, + avg_request_per_minute_per_user={"mean": 1.0, "distribution": "poisson"}, + user_sampling_window="not-an-int", + ) + + errors = excinfo.value.errors() + assert any(err["loc"] == ("user_sampling_window",) for err in errors) + + # Pydantic v2 wording + assert any("valid integer" in err["msg"] for err in errors) + + + +def test_user_sampling_window_above_max_raises() -> None: + """ + Passing user_sampling_window > MAX_USER_SAMPLING_WINDOW + must raise a ValidationError. + """ + too_large = TimeDefaults.MAX_USER_SAMPLING_WINDOW.value + 1 + with pytest.raises(ValidationError) as excinfo: + SimulationInput( + avg_active_users={"mean": 1.0, "distribution": "poisson"}, + avg_request_per_minute_per_user={"mean": 1.0, "distribution": "poisson"}, + user_sampling_window=too_large, + ) + + errors = excinfo.value.errors() + assert any(err["loc"] == ("user_sampling_window",) for err in errors) + + expected_snippet = ( + f"less than or equal to {TimeDefaults.MAX_USER_SAMPLING_WINDOW.value}" + ) + assert any(expected_snippet in err["msg"] for err in errors) + + + +# -------------------------------------------------------------------------- +# TEST FIELD VALIDATOR TOTAL SIMULATION TIME +# -------------------------------------------------------------------------- + +def test_default_total_simulation_time() -> None: + """When total_simulation_time is omitted, it defaults to SIMULATION_TIME.""" + inp = SimulationInput( + avg_active_users={"mean": 1.0, "distribution": "poisson"}, + avg_request_per_minute_per_user={"mean": 1.0, "distribution": "poisson"}, + ) + assert inp.total_simulation_time == TimeDefaults.SIMULATION_TIME.value + + +def test_explicit_total_simulation_time_kept() -> None: + """An explicit total_simulation_time value is preserved unchanged.""" + custom_time = 3_000 + inp = SimulationInput( + avg_active_users={"mean": 1.0, "distribution": "poisson"}, + avg_request_per_minute_per_user={"mean": 1.0, "distribution": "poisson"}, + total_simulation_time=custom_time, + ) + assert inp.total_simulation_time == custom_time + + +def test_total_simulation_time_not_int_raises() -> None: + """A non-integer total_simulation_time raises a ValidationError.""" + with pytest.raises(ValidationError) as excinfo: + + SimulationInput( + avg_active_users={"mean": 1.0, "distribution": "poisson"}, + avg_request_per_minute_per_user={"mean": 1.0, "distribution": "poisson"}, + total_simulation_time="three thousand", + ) + + errors = excinfo.value.errors() + assert any(err["loc"] == ("total_simulation_time",) for err in errors) + + # Pydantic v2 wording: “Input should be a valid integer” + assert any("valid integer" in err["msg"] for err in errors) + + + +def test_total_simulation_time_below_minimum_raises() -> None: + """ + Passing total_simulation_time < MIN_SIMULATION_TIME + must raise a ValidationError. + """ + too_small = TimeDefaults.MIN_SIMULATION_TIME.value - 1 + with pytest.raises(ValidationError) as excinfo: + SimulationInput( + avg_active_users={"mean": 1.0, "distribution": "poisson"}, + avg_request_per_minute_per_user={"mean": 1.0, "distribution": "poisson"}, + total_simulation_time=too_small, + ) + + errors = excinfo.value.errors() + # c'è almeno un errore sul campo giusto + assert any(err["loc"] == ("total_simulation_time",) for err in errors) + + expected_snippet = ( + f"greater than or equal to {TimeDefaults.MIN_SIMULATION_TIME.value}" + ) + assert any(expected_snippet in err["msg"] for err in errors) + + diff --git a/tests/unit/simulation/test_requests_generator.py b/tests/unit/simulation/test_requests_generator.py index e7fad85..0bf5872 100644 --- a/tests/unit/simulation/test_requests_generator.py +++ b/tests/unit/simulation/test_requests_generator.py @@ -8,6 +8,7 @@ import numpy as np import pytest +from app.config.constants import TimeDefaults from app.core.simulation.requests_generator import requests_generator from app.core.simulation.simulation_run import run_simulation from app.schemas.simulation_input import SimulationInput @@ -28,7 +29,7 @@ def base_input() -> SimulationInput: return SimulationInput( avg_active_users={"mean": 1.0}, avg_request_per_minute_per_user={"mean": 2.0}, - total_simulation_time=120, + total_simulation_time=TimeDefaults.MIN_SIMULATION_TIME.value, ) # -------------------------------------------------------------- @@ -73,7 +74,7 @@ def test_requests_generator_dispatches_to_correct_sampler( input_data = SimulationInput( avg_active_users={"mean": 1.0, "distribution": dist}, avg_request_per_minute_per_user={"mean": 1.0}, - total_simulation_time=120, # Must be > 60 to pass schema validation + total_simulation_time=TimeDefaults.MIN_SIMULATION_TIME.value, ) rng = np.random.default_rng(0) gen = requests_generator(input_data, rng=rng) diff --git a/tests/unit/simulation/test_simulation_input.py b/tests/unit/simulation/test_simulation_input.py deleted file mode 100644 index 20a32d9..0000000 --- a/tests/unit/simulation/test_simulation_input.py +++ /dev/null @@ -1,93 +0,0 @@ -import pytest -from pydantic import ValidationError - -from app.schemas.simulation_input import RVConfig, SimulationInput - - -def test_normal_sets_variance_to_mean() -> None: - """When distribution='normal' and variance is omitted, variance == mean.""" - cfg = RVConfig(mean=10, distribution="normal") - assert cfg.variance == 10.0 - - -def test_poisson_keeps_variance_none() -> None: - """When distribution='poisson' and variance is omitted, variance stays None.""" - cfg = RVConfig(mean=5, distribution="poisson") - assert cfg.variance is None - - -def test_explicit_variance_is_preserved() -> None: - """If the user supplies variance explicitly, it is preserved unchanged.""" - cfg = RVConfig(mean=8, distribution="normal", variance=4) - assert cfg.variance == 4.0 - - -def test_mean_must_be_numeric() -> None: - """A non-numeric mean raises a ValidationError with our custom message.""" - with pytest.raises(ValidationError) as excinfo: - RVConfig(mean="not a number", distribution="poisson") - - # Check that at least one error refers to the 'mean' field - assert any(err["loc"] == ("mean",) for err in excinfo.value.errors()) - assert "mean must be a number" in excinfo.value.errors()[0]["msg"] - - -def test_missing_mean_field() -> None: - """Omitting the mean field raises a 'field required' ValidationError.""" - with pytest.raises(ValidationError) as excinfo: - # Using model_validate avoids the constructor signature check - RVConfig.model_validate({"distribution": "normal"}) - - assert any( - err["loc"] == ("mean",) and err["type"] == "missing" - for err in excinfo.value.errors() - ) - -def test_gaussian_sets_variance_to_mean() -> None: - """When distribution='gaussian' and variance is omitted, variance == mean.""" - cfg = RVConfig(mean=12.5, distribution="gaussian") - assert cfg.variance == pytest.approx(12.5) - - -def test_default_distribution_is_poisson() -> None: - """ - When distribution is omitted, it defaults to 'poisson' and - variance stays None. - """ - cfg = RVConfig(mean=3.3) - assert cfg.distribution == "poisson" - assert cfg.variance is None - - -def test_explicit_variance_kept_for_poisson() -> None: - """If the user supplies variance even for poisson, it is preserved.""" - cfg = RVConfig(mean=4.0, distribution="poisson", variance=2.2) - assert cfg.variance == pytest.approx(2.2) - - -def test_invalid_distribution_raises() -> None: - """Supplying a non-supported distribution literal raises ValidationError.""" - with pytest.raises(ValidationError) as excinfo: - RVConfig(mean=5.0, distribution="not_a_dist") - - errors = excinfo.value.errors() - # Only assert there is at least one error for the 'distribution' field: - assert any(e["loc"] == ("distribution",) for e in errors) - - -def test_simulation_time_below_minimum_raises() -> None: - """ - Passing total_simulation_time <= 60 must raise a ValidationError, - because the minimum allowed simulation time is 61 seconds. - """ - with pytest.raises(ValidationError) as excinfo: - SimulationInput( - avg_active_users={"mean": 1.0}, - avg_request_per_minute_per_user={"mean": 1.0}, - total_simulation_time=60, # exactly at the boundary - ) - errors = excinfo.value.errors() - assert any( - err["loc"] == ("total_simulation_time",) and "at least 60 seconds" in err["msg"] - for err in errors - ) From df3e505579dbcccadf8e479eb212cdcf786d2f37 Mon Sep 17 00:00:00 2001 From: Gioele Botta <157820569+GioeleB00@users.noreply.github.com> Date: Thu, 10 Jul 2025 17:30:00 +0200 Subject: [PATCH 5/5] Update tests/unit/sampler/test_poisson_posson.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/unit/sampler/test_poisson_posson.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/sampler/test_poisson_posson.py b/tests/unit/sampler/test_poisson_posson.py index 0ea539e..c0a2f3e 100644 --- a/tests/unit/sampler/test_poisson_posson.py +++ b/tests/unit/sampler/test_poisson_posson.py @@ -114,6 +114,6 @@ def test_cumulative_time_never_exceeds_horizon(base_input: SimulationInput) -> N gaps = list(poisson_poisson_sampling(base_input, rng=rng)) cum_time = math.fsum(gaps) - # Even if the virtual clock può saltare quando λ == 0, - # i gap sommati non devono mai superare l'orizzonte. + # Even if the virtual clock can jump when λ == 0, + # the summed gaps must never exceed the horizon. assert cum_time < base_input.total_simulation_time