dotnet
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/README.md‎
Lines changed: 1 addition & 1 deletion b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/EquivalenceEvaluatorContext.cs‎
Lines changed: 2 additions & 1 deletion b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/EquivalenceEvaluatorContext.cs‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/GroundednessEvaluatorContext.cs‎
Lines changed: 2 additions & 1 deletion b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/GroundednessEvaluatorContext.cs‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/README.md‎
Lines changed: 1 addition & 1 deletion b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs‎
Lines changed: 7 additions & 5 deletions b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/SingleNumericMetricEvaluator.cs‎
Lines changed: 5 additions & 5 deletions b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/SingleNumericMetricEvaluator.cs‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/README.md‎
Lines changed: 1 addition & 1 deletion b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/README.md‎
Lines changed: 1 addition & 1 deletion b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx‎
Lines changed: 2 additions & 2 deletions b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/CodeVulnerabilityEvaluator.cs‎
Lines changed: 88 additions & 0 deletions b/‎src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/CodeVulnerabilityEvaluator.cs‎
Lines changed: 88 additions & 0 deletions
@@ -4,7 +4,7 @@
 
 * [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
 * [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Equivalence and Groundedness.
-* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains evaluators that can be used to evaluate the content safety of AI responses in your projects including Hate and Fairness, Self-Harm, Violence etc.
+* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Content Safety service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
 * [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
 * [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
 * [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
 
@@ -9,7 +9,8 @@
 namespace Microsoft.Extensions.AI.Evaluation.Quality;
 
 /// <summary>
-/// Contextual information required to evaluate the 'Equivalence' of a response.
+/// Contextual information that the <see cref="EquivalenceEvaluator"/> uses to evaluate the 'Equivalence' of a
+/// response.
 /// </summary>
 /// <param name="groundTruth">
 /// The ground truth response against which the response that is being evaluated is compared.
 
@@ -9,7 +9,8 @@
 namespace Microsoft.Extensions.AI.Evaluation.Quality;
 
 /// <summary>
-/// Contextual information required to evaluate the 'Groundedness' of a response.
+/// Contextual information that the <see cref="GroundednessEvaluator"/> uses to evaluate the 'Groundedness' of a
+/// response.
 /// </summary>
 /// <param name="groundingContext">
 /// Contextual information against which the 'Groundedness' of a response is evaluated.
 
@@ -4,7 +4,7 @@
 
 * [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
 * [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Equivalence and Groundedness.
-* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains evaluators that can be used to evaluate the content safety of AI responses in your projects including Hate and Fairness, Self-Harm, Violence etc.
+* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Content Safety service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
 * [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
 * [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
 * [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
 
@@ -207,31 +207,33 @@ void UpdateResult()
             const string Rationales = "Rationales";
             const string Separator = "; ";
 
-            var commonMetadata = new Dictionary<string, string> { ["rtc_evaluation_duration"] = duration };
+            var commonMetadata = new Dictionary<string, string>();
 
             if (!string.IsNullOrWhiteSpace(evaluationResponse.ModelId))
             {
-                commonMetadata["rtc_evaluation_model_used"] = evaluationResponse.ModelId!;
+                commonMetadata["rtc-evaluation-model-used"] = evaluationResponse.ModelId!;
             }
 
             if (evaluationResponse.Usage is UsageDetails usage)
             {
                 if (usage.InputTokenCount is not null)
                 {
-                    commonMetadata["rtc_evaluation_input_tokens_used"] = $"{usage.InputTokenCount}";
+                    commonMetadata["rtc-evaluation-input-tokens-used"] = $"{usage.InputTokenCount}";
                 }
 
                 if (usage.OutputTokenCount is not null)
                 {
-                    commonMetadata["rtc_evaluation_output_tokens_used"] = $"{usage.OutputTokenCount}";
+                    commonMetadata["rtc-evaluation-output-tokens-used"] = $"{usage.OutputTokenCount}";
                 }
 
                 if (usage.TotalTokenCount is not null)
                 {
-                    commonMetadata["rtc_evaluation_total_tokens_used"] = $"{usage.TotalTokenCount}";
+                    commonMetadata["rtc-evaluation-total-tokens-used"] = $"{usage.TotalTokenCount}";
                 }
             }
 
+            commonMetadata["rtc-evaluation-duration"] = duration;
+
             NumericMetric relevance = result.Get<NumericMetric>(RelevanceMetricName);
             relevance.Value = rating.Relevance;
             relevance.Interpretation = relevance.InterpretScore();
 
@@ -80,24 +80,24 @@ await chatConfiguration.ChatClient.GetResponseAsync(
 
             if (!string.IsNullOrWhiteSpace(evaluationResponse.ModelId))
             {
-                metric.AddOrUpdateMetadata(name: "evaluation_model_used", value: evaluationResponse.ModelId!);
+                metric.AddOrUpdateMetadata(name: "evaluation-model-used", value: evaluationResponse.ModelId!);
             }
 
             if (evaluationResponse.Usage is UsageDetails usage)
             {
                 if (usage.InputTokenCount is not null)
                 {
-                    metric.AddOrUpdateMetadata(name: "evaluation_input_tokens_used", value: $"{usage.InputTokenCount}");
+                    metric.AddOrUpdateMetadata(name: "evaluation-input-tokens-used", value: $"{usage.InputTokenCount}");
                 }
 
                 if (usage.OutputTokenCount is not null)
                 {
-                    metric.AddOrUpdateMetadata(name: "evaluation_output_tokens_used", value: $"{usage.OutputTokenCount}");
+                    metric.AddOrUpdateMetadata(name: "evaluation-output-tokens-used", value: $"{usage.OutputTokenCount}");
                 }
 
                 if (usage.TotalTokenCount is not null)
                 {
-                    metric.AddOrUpdateMetadata(name: "evaluation_total_tokens_used", value: $"{usage.TotalTokenCount}");
+                    metric.AddOrUpdateMetadata(name: "evaluation-total-tokens-used", value: $"{usage.TotalTokenCount}");
                 }
             }
 
@@ -126,7 +126,7 @@ await chatConfiguration.ChatClient.GetResponseAsync(
         {
             stopwatch.Stop();
             string duration = $"{stopwatch.Elapsed.TotalSeconds.ToString("F2", CultureInfo.InvariantCulture)} s";
-            metric.AddOrUpdateMetadata(name: "evaluation_duration", value: duration);
+            metric.AddOrUpdateMetadata(name: "evaluation-duration", value: duration);
         }
     }
 }
@@ -4,7 +4,7 @@
 
 * [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
 * [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Equivalence and Groundedness.
-* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains evaluators that can be used to evaluate the content safety of AI responses in your projects including Hate and Fairness, Self-Harm, Violence etc.
+* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Content Safety service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
 * [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
 * [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
 * [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
 
@@ -4,7 +4,7 @@
 
 * [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
 * [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Equivalence and Groundedness.
-* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains evaluators that can be used to evaluate the content safety of AI responses in your projects including Hate and Fairness, Self-Harm, Violence etc.
+* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Content Safety service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
 * [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
 * [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
 * [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
 
@@ -42,7 +42,7 @@ const useCardStyles = makeStyles({
         padding: '.75rem', 
         border: `1px solid ${tokens.colorNeutralStroke2}`,
         borderRadius: '4px',
-        width: '8rem',
+        width: '12.5rem',
         cursor: 'pointer',
         transition: 'box-shadow 0.2s ease-in-out, outline 0.2s ease-in-out',
         position: 'relative',
@@ -241,4 +241,4 @@ export const MetricDisplay = ({metric}: {metric: MetricWithNoValue | NumericMetr
         classes.metricPill,
     );
     return (<div className={pillClass}><span className={fg}>{metricValue}</span></div>);
-};
+};
@@ -0,0 +1,88 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace Microsoft.Extensions.AI.Evaluation.Safety;
+
+/// <summary>
+/// An <see cref="IEvaluator"/> that utilizes the Azure AI Content Safety service to evaluate code completion responses
+/// produced by an AI model for the presence of vulnerable code.
+/// </summary>
+/// <remarks>
+/// <para>
+/// <see cref="CodeVulnerabilityEvaluator"/> supports evaluation of code vulnerabilities in the following programming
+/// languages: Python, Java, C++, C#, Go, JavaScript and SQL. It can identify a variety of code vulnerabilities such as
+/// sql injection, stack trace exposure, hardcoded credentials etc.
+/// </para>
+/// <para>
+/// <see cref="CodeVulnerabilityEvaluator"/> returns a <see cref="BooleanMetric"/> with a value of
+/// <see langword="true"/> indicating the presence of an vulnerable code in the evaluated response, and a value of
+/// <see langword="false"/> indicating the absence of vulnerable code.
+/// </para>
+/// <para>
+/// Note that <see cref="CodeVulnerabilityEvaluator"/> does not support evaluation of multimodal content present in
+/// the evaluated responses. Images and other multimodal content present in the evaluated responses will be ignored.
+/// Also note that if a multi-turn conversation is supplied as input, <see cref="CodeVulnerabilityEvaluator"/> will
+/// only evaluate the code present in the last conversation turn. Any code present in the previous conversation turns
+/// will be ignored.
+/// </para>
+/// </remarks>
+/// <param name="contentSafetyServiceConfiguration">
+/// Specifies the Azure AI project that should be used and credentials that should be used when this
+/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform
+/// evaluations.
+/// </param>
+public sealed class CodeVulnerabilityEvaluator(ContentSafetyServiceConfiguration contentSafetyServiceConfiguration)
+    : ContentSafetyEvaluator(
+        contentSafetyServiceConfiguration,
+        contentSafetyServiceAnnotationTask: "code vulnerability",
+        evaluatorName: nameof(CodeVulnerabilityEvaluator))
+{
+    /// <summary>
+    /// Gets the <see cref="EvaluationMetric.Name"/> of the <see cref="BooleanMetric"/> returned by
+    /// <see cref="CodeVulnerabilityEvaluator"/>.
+    /// </summary>
+    public static string CodeVulnerabilityMetricName => "Code Vulnerability";
+
+    /// <inheritdoc/>
+    public override IReadOnlyCollection<string> EvaluationMetricNames => [CodeVulnerabilityMetricName];
+
+    /// <inheritdoc/>
+    public override async ValueTask<EvaluationResult> EvaluateAsync(
+        IEnumerable<ChatMessage> messages,
+        ChatResponse modelResponse,
+        ChatConfiguration? chatConfiguration = null,
+        IEnumerable<EvaluationContext>? additionalContext = null,
+        CancellationToken cancellationToken = default)
+    {
+        const string CodeVulnerabilityContentSafetyServiceMetricName = "code_vulnerability";
+
+        EvaluationResult result =
+            await EvaluateContentSafetyAsync(
+                messages,
+                modelResponse,
+                contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.ContextCompletion.ToString(),
+                contentSafetyServiceMetricName: CodeVulnerabilityContentSafetyServiceMetricName,
+                cancellationToken: cancellationToken).ConfigureAwait(false);
+
+        IEnumerable<EvaluationMetric> updatedMetrics =
+            result.Metrics.Values.Select(
+                metric =>
+                {
+                    if (metric.Name == CodeVulnerabilityContentSafetyServiceMetricName)
+                    {
+                        metric.Name = CodeVulnerabilityMetricName;
+                    }
+
+                    return metric;
+                });
+
+        result = new EvaluationResult(updatedMetrics);
+        result.Interpret(metric => metric is BooleanMetric booleanMetric ? booleanMetric.InterpretScore() : null);
+        return result;
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -207,31 +207,33 @@ void UpdateResult()`
`207`	`207`	`const string Rationales = "Rationales";`
`208`	`208`	`const string Separator = "; ";`
`209`	`209`
`210`		`- var commonMetadata = new Dictionary<string, string> { ["rtc_evaluation_duration"] = duration };`
	`210`	`+ var commonMetadata = new Dictionary<string, string>();`
`211`	`211`
`212`	`212`	`if (!string.IsNullOrWhiteSpace(evaluationResponse.ModelId))`
`213`	`213`	`{`
`214`		`- commonMetadata["rtc_evaluation_model_used"] = evaluationResponse.ModelId!;`
	`214`	`+ commonMetadata["rtc-evaluation-model-used"] = evaluationResponse.ModelId!;`
`215`	`215`	`}`
`216`	`216`
`217`	`217`	`if (evaluationResponse.Usage is UsageDetails usage)`
`218`	`218`	`{`
`219`	`219`	`if (usage.InputTokenCount is not null)`
`220`	`220`	`{`
`221`		`- commonMetadata["rtc_evaluation_input_tokens_used"] = $"{usage.InputTokenCount}";`
	`221`	`+ commonMetadata["rtc-evaluation-input-tokens-used"] = $"{usage.InputTokenCount}";`
`222`	`222`	`}`
`223`	`223`
`224`	`224`	`if (usage.OutputTokenCount is not null)`
`225`	`225`	`{`
`226`		`- commonMetadata["rtc_evaluation_output_tokens_used"] = $"{usage.OutputTokenCount}";`
	`226`	`+ commonMetadata["rtc-evaluation-output-tokens-used"] = $"{usage.OutputTokenCount}";`
`227`	`227`	`}`
`228`	`228`
`229`	`229`	`if (usage.TotalTokenCount is not null)`
`230`	`230`	`{`
`231`		`- commonMetadata["rtc_evaluation_total_tokens_used"] = $"{usage.TotalTokenCount}";`
	`231`	`+ commonMetadata["rtc-evaluation-total-tokens-used"] = $"{usage.TotalTokenCount}";`
`232`	`232`	`}`
`233`	`233`	`}`
`234`	`234`
	`235`	`+ commonMetadata["rtc-evaluation-duration"] = duration;`
	`236`	`+`
`235`	`237`	`NumericMetric relevance = result.Get<NumericMetric>(RelevanceMetricName);`
`236`	`238`	`relevance.Value = rating.Relevance;`
`237`	`239`	`relevance.Interpretation = relevance.InterpretScore();`
Original file line number	Diff line number	Diff line change
`@@ -80,24 +80,24 @@ await chatConfiguration.ChatClient.GetResponseAsync(`
`80`	`80`
`81`	`81`	`if (!string.IsNullOrWhiteSpace(evaluationResponse.ModelId))`
`82`	`82`	`{`
`83`		`- metric.AddOrUpdateMetadata(name: "evaluation_model_used", value: evaluationResponse.ModelId!);`
	`83`	`+ metric.AddOrUpdateMetadata(name: "evaluation-model-used", value: evaluationResponse.ModelId!);`
`84`	`84`	`}`
`85`	`85`
`86`	`86`	`if (evaluationResponse.Usage is UsageDetails usage)`
`87`	`87`	`{`
`88`	`88`	`if (usage.InputTokenCount is not null)`
`89`	`89`	`{`
`90`		`- metric.AddOrUpdateMetadata(name: "evaluation_input_tokens_used", value: $"{usage.InputTokenCount}");`
	`90`	`+ metric.AddOrUpdateMetadata(name: "evaluation-input-tokens-used", value: $"{usage.InputTokenCount}");`
`91`	`91`	`}`
`92`	`92`
`93`	`93`	`if (usage.OutputTokenCount is not null)`
`94`	`94`	`{`
`95`		`- metric.AddOrUpdateMetadata(name: "evaluation_output_tokens_used", value: $"{usage.OutputTokenCount}");`
	`95`	`+ metric.AddOrUpdateMetadata(name: "evaluation-output-tokens-used", value: $"{usage.OutputTokenCount}");`
`96`	`96`	`}`
`97`	`97`
`98`	`98`	`if (usage.TotalTokenCount is not null)`
`99`	`99`	`{`
`100`		`- metric.AddOrUpdateMetadata(name: "evaluation_total_tokens_used", value: $"{usage.TotalTokenCount}");`
	`100`	`+ metric.AddOrUpdateMetadata(name: "evaluation-total-tokens-used", value: $"{usage.TotalTokenCount}");`
`101`	`101`	`}`
`102`	`102`	`}`
`103`	`103`
`@@ -126,7 +126,7 @@ await chatConfiguration.ChatClient.GetResponseAsync(`
`126`	`126`	`{`
`127`	`127`	`stopwatch.Stop();`
`128`	`128`	`string duration = $"{stopwatch.Elapsed.TotalSeconds.ToString("F2", CultureInfo.InvariantCulture)} s";`
`129`		`- metric.AddOrUpdateMetadata(name: "evaluation_duration", value: duration);`
	`129`	`+ metric.AddOrUpdateMetadata(name: "evaluation-duration", value: duration);`
`130`	`130`	`}`
`131`	`131`	`}`
`132`	`132`	`}`