Skip to content

Commit 8fd1fd6

Browse files
committed
feat: Fix and run evaluation.py
1 parent b83aa3f commit 8fd1fd6

File tree

1 file changed

+61
-0
lines changed
  • 2025/generative-ai-agent-dev-deploy-handson/agents/app

1 file changed

+61
-0
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import pandas as pd
2+
3+
import vertexai
4+
from vertexai.evaluation import EvalTask, PointwiseMetric, PointwiseMetricPromptTemplate
5+
from google.cloud import aiplatform
6+
from agent import root_agent
7+
8+
PROJECT_ID = "gossy-workstations"
9+
LOCATION = "us-central1"
10+
EXPERIMENT_NAME = "experiment-name"
11+
12+
vertexai.init(
13+
project=PROJECT_ID,
14+
location=LOCATION,
15+
)
16+
17+
# Define a pointwise metric with two criteria: Fluency and Entertaining.
18+
custom_text_quality = PointwiseMetric(
19+
metric="custom_text_quality",
20+
metric_prompt_template=PointwiseMetricPromptTemplate(
21+
criteria={
22+
"fluency": (
23+
"Sentences flow smoothly and are easy to read, avoiding awkward"
24+
" phrasing or run-on sentences. Ideas and sentences connect"
25+
" logically, using transitions effectively where needed."
26+
),
27+
"entertaining": (
28+
"Short, amusing text that incorporates emojis, exclamations and"
29+
" questions to convey quick and spontaneous communication and"
30+
" diversion."
31+
),
32+
},
33+
rating_rubric={
34+
"1": "The response performs well on both criteria.",
35+
"0": "The response is somewhat aligned with both criteria",
36+
"-1": "The response falls short on both criteria",
37+
},
38+
),
39+
)
40+
41+
responses = [
42+
# An example of good custom_text_quality
43+
"Life is a rollercoaster, full of ups and downs, but it's the thrill that keeps us coming back for more!",
44+
# An example of medium custom_text_quality
45+
"The weather is nice today, not too hot, not too cold.",
46+
# An example of poor custom_text_quality
47+
"The weather is, you know, whatever.",
48+
]
49+
50+
eval_dataset = pd.DataFrame({
51+
"response" : responses,
52+
})
53+
54+
eval_task = EvalTask(
55+
dataset=eval_dataset,
56+
metrics=[custom_text_quality],
57+
experiment=EXPERIMENT_NAME
58+
)
59+
60+
pointwise_result = eval_task.evaluate()
61+
print(pointwise_result)

0 commit comments

Comments
 (0)