[Docs] Apply chaos selectively (#1923)

martintmk · web-flow · commit 7ad08067f4db · 2024-01-25T17:31:41.000Z
diff --git a/.github/wordlist.txt b/.github/wordlist.txt
@@ -1,5 +1,6 @@
 alloc
 apis
+ASP.NET
 async
 azurefunctions
 bcl
@@ -13,6 +14,7 @@ deserialization
 dotnet
 dotnetrocks
 durations
+enricher
 eshoponcontainers
 extensibility
 flurl
@@ -23,16 +25,17 @@ jittered
 json
 loggingpolicy
 markdownsnippets
+middleware
 minver
 moq
 namespace
 natively
 ndc
 nuget
 oss
-pcl
-parallelize
 paas
+parallelize
+pcl
 pluralsight
 pollydocs
 pre
@@ -67,4 +70,3 @@ uwp
 waitandretry
 wpf
 xunit
-enricher
diff --git a/docs/chaos/index.md b/docs/chaos/index.md
@@ -90,3 +90,113 @@ All the strategies' options implement the [`ChaosStrategyOptions`](xref:Polly.Si
 > If both `Enabled` and `EnabledGenerator` are specified then `Enabled` will be ignored.
 
 [simmy]: https://github.com/Polly-Contrib/Simmy
+
+## Patterns
+
+### Inject chaos selectively
+
+You aim to dynamically adjust the frequency and timing of chaos injection. For instance, in pre-production and test environments, it's sensible to consistently inject chaos. This proactive approach helps in preparing for potential failures. In production environments, however, you may prefer to limit chaos to certain users and tenants, ensuring that regular users remain unaffected. The chaos API offers the flexibility needed to manage these varying scenarios.
+
+Additionally, you have the option to dynamically alter the injection rate and simulate extreme scenarios by setting the injection rate to *1.0 (100%)*. Exercise caution when applying this high rate, restricting it to a subset of tenants and users to avoid rendering the system unusable for regular users.
+
+The following example illustrates how to configure chaos strategies accordingly:
+
+<!-- snippet: chaos-selective -->
+```cs
+services.AddResiliencePipeline("chaos-pipeline", (builder, context) =>
+{
+    var environment = context.ServiceProvider.GetRequiredService<IHostEnvironment>();
+
+    builder.AddChaosFault(new ChaosFaultStrategyOptions
+    {
+        EnabledGenerator = args =>
+        {
+            // Enable chaos in development and staging environments.
+            if (environment.IsDevelopment() || environment.IsStaging())
+            {
+                return ValueTask.FromResult(true);
+            }
+
+            // Enable chaos for specific users or tenants, even in production environments.
+            if (ShouldEnableChaos(args.Context))
+            {
+                return ValueTask.FromResult(true);
+            }
+
+            return ValueTask.FromResult(false);
+        },
+        InjectionRateGenerator = args =>
+        {
+            if (environment.IsStaging())
+            {
+                // 1% chance of failure on staging environments.
+                return ValueTask.FromResult(0.01);
+            }
+
+            if (environment.IsDevelopment())
+            {
+                // 5% chance of failure on development environments.
+                return ValueTask.FromResult(0.05);
+            }
+
+            // The context can carry information to help determine the injection rate.
+            // For instance, in production environments, you might have certain test users or tenants
+            // for whom you wish to inject chaos.
+            if (ResolveInjectionRate(args.Context, out double injectionRate))
+            {
+                return ValueTask.FromResult(injectionRate);
+            }
+
+            // No chaos on production environments.
+            return ValueTask.FromResult(0.0);
+        },
+        FaultGenerator = new FaultGenerator()
+            .AddException<TimeoutException>()
+            .AddException<HttpRequestException>()
+    });
+});
+```
+<!-- endSnippet -->
+
+We suggest encapsulating the chaos decisions and injection rate in a shared class, such as `IChaosManager`:
+
+<!-- snippet: chaos-manager -->
+```cs
+public interface IChaosManager
+{
+    bool IsChaosEnabled(ResilienceContext context);
+
+    double GetInjectionRate(ResilienceContext context);
+}
+```
+<!-- endSnippet -->
+
+This approach allows you to consistently apply and manage chaos-related settings across various chaos strategies by reusing `IChaosManager`. By centralizing the logic for enabling chaos and determining injection rates, you can ensure uniformity and ease of maintenance across your application and reuse it across multiple chaos strategies:
+
+<!-- snippet: chaos-selective-manager -->
+```cs
+services.AddResiliencePipeline("chaos-pipeline", (builder, context) =>
+{
+    var chaosManager = context.ServiceProvider.GetRequiredService<IChaosManager>();
+
+    builder
+        .AddChaosFault(new ChaosFaultStrategyOptions
+        {
+            EnabledGenerator = args => ValueTask.FromResult(chaosManager.IsChaosEnabled(args.Context)),
+            InjectionRateGenerator = args => ValueTask.FromResult(chaosManager.GetInjectionRate(args.Context)),
+            FaultGenerator = new FaultGenerator()
+                .AddException<TimeoutException>()
+                .AddException<HttpRequestException>()
+        })
+        .AddChaosLatency(new ChaosLatencyStrategyOptions
+        {
+            EnabledGenerator = args => ValueTask.FromResult(chaosManager.IsChaosEnabled(args.Context)),
+            InjectionRateGenerator = args => ValueTask.FromResult(chaosManager.GetInjectionRate(args.Context)),
+            Latency = TimeSpan.FromSeconds(60)
+        });
+});
+```
+<!-- endSnippet -->
+
+> [!NOTE]
+> An alternative method involves using [`Microsoft.Extensions.AsyncState`](https://www.nuget.org/packages/Microsoft.Extensions.AsyncState) for storing information relevant to chaos injection decisions. This can be particularly useful in frameworks like ASP.NET Core. For instance, you could implement a middleware that retrieves user information from `HttpContext`, assesses the user type, and then stores this data in `IAsyncContext<ChaosUser>`. Subsequently, `IChaosManager` can access `IAsyncContext<ChaosUser>` to retrieve this information. This approach eliminates the need to manually insert such data into `ResilienceContext` for each call within the resilience pipeline, thereby streamlining the process.
diff --git a/src/Snippets/Docs/Chaos.Index.cs b/src/Snippets/Docs/Chaos.Index.cs
@@ -1,7 +1,11 @@
 using System.Net.Http;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Hosting;
 using Polly.CircuitBreaker;
 using Polly.Retry;
 using Polly.Simmy;
+using Polly.Simmy.Fault;
+using Polly.Simmy.Latency;
 
 namespace Snippets.Docs;
 
@@ -35,5 +39,112 @@ public static void Usage()
         #endregion
     }
 
+    public static void ApplyChaosSelectively(IServiceCollection services)
+    {
+        #region chaos-selective
+
+        services.AddResiliencePipeline("chaos-pipeline", (builder, context) =>
+        {
+            var environment = context.ServiceProvider.GetRequiredService<IHostEnvironment>();
+
+            builder.AddChaosFault(new ChaosFaultStrategyOptions
+            {
+                EnabledGenerator = args =>
+                {
+                    // Enable chaos in development and staging environments.
+                    if (environment.IsDevelopment() || environment.IsStaging())
+                    {
+                        return ValueTask.FromResult(true);
+                    }
+
+                    // Enable chaos for specific users or tenants, even in production environments.
+                    if (ShouldEnableChaos(args.Context))
+                    {
+                        return ValueTask.FromResult(true);
+                    }
+
+                    return ValueTask.FromResult(false);
+                },
+                InjectionRateGenerator = args =>
+                {
+                    if (environment.IsStaging())
+                    {
+                        // 1% chance of failure on staging environments.
+                        return ValueTask.FromResult(0.01);
+                    }
+
+                    if (environment.IsDevelopment())
+                    {
+                        // 5% chance of failure on development environments.
+                        return ValueTask.FromResult(0.05);
+                    }
+
+                    // The context can carry information to help determine the injection rate.
+                    // For instance, in production environments, you might have certain test users or tenants
+                    // for whom you wish to inject chaos.
+                    if (ResolveInjectionRate(args.Context, out double injectionRate))
+                    {
+                        return ValueTask.FromResult(injectionRate);
+                    }
+
+                    // No chaos on production environments.
+                    return ValueTask.FromResult(0.0);
+                },
+                FaultGenerator = new FaultGenerator()
+                    .AddException<TimeoutException>()
+                    .AddException<HttpRequestException>()
+            });
+        });
+
+        #endregion
+    }
+
+    public static void ApplyChaosSelectivelyWithChaosManager(IServiceCollection services)
+    {
+        #region chaos-selective-manager
+
+        services.AddResiliencePipeline("chaos-pipeline", (builder, context) =>
+        {
+            var chaosManager = context.ServiceProvider.GetRequiredService<IChaosManager>();
+
+            builder
+                .AddChaosFault(new ChaosFaultStrategyOptions
+                {
+                    EnabledGenerator = args => ValueTask.FromResult(chaosManager.IsChaosEnabled(args.Context)),
+                    InjectionRateGenerator = args => ValueTask.FromResult(chaosManager.GetInjectionRate(args.Context)),
+                    FaultGenerator = new FaultGenerator()
+                        .AddException<TimeoutException>()
+                        .AddException<HttpRequestException>()
+                })
+                .AddChaosLatency(new ChaosLatencyStrategyOptions
+                {
+                    EnabledGenerator = args => ValueTask.FromResult(chaosManager.IsChaosEnabled(args.Context)),
+                    InjectionRateGenerator = args => ValueTask.FromResult(chaosManager.GetInjectionRate(args.Context)),
+                    Latency = TimeSpan.FromSeconds(60)
+                });
+        });
+
+        #endregion
+    }
+
+    private static bool ResolveInjectionRate(ResilienceContext context, out double injectionRate)
+    {
+        injectionRate = 0.0;
+        return false;
+    }
+
+    private static bool ShouldEnableChaos(ResilienceContext context) => true;
+
     private static ValueTask RestartRedisAsync(CancellationToken cancellationToken) => ValueTask.CompletedTask;
+
+    #region  chaos-manager
+
+    public interface IChaosManager
+    {
+        bool IsChaosEnabled(ResilienceContext context);
+
+        double GetInjectionRate(ResilienceContext context);
+    }
+
+    #endregion
 }