1+ import pandas as pd
2+
3+ import vertexai
4+ from vertexai .evaluation import EvalTask , PointwiseMetric , PointwiseMetricPromptTemplate
5+ from google .cloud import aiplatform
6+ from agent import root_agent
7+
8+ PROJECT_ID = "gossy-workstations"
9+ LOCATION = "us-central1"
10+ EXPERIMENT_NAME = "experiment-name"
11+
12+ vertexai .init (
13+ project = PROJECT_ID ,
14+ location = LOCATION ,
15+ )
16+
17+ # Define a pointwise metric with two criteria: Fluency and Entertaining.
18+ custom_text_quality = PointwiseMetric (
19+ metric = "custom_text_quality" ,
20+ metric_prompt_template = PointwiseMetricPromptTemplate (
21+ criteria = {
22+ "fluency" : (
23+ "Sentences flow smoothly and are easy to read, avoiding awkward"
24+ " phrasing or run-on sentences. Ideas and sentences connect"
25+ " logically, using transitions effectively where needed."
26+ ),
27+ "entertaining" : (
28+ "Short, amusing text that incorporates emojis, exclamations and"
29+ " questions to convey quick and spontaneous communication and"
30+ " diversion."
31+ ),
32+ },
33+ rating_rubric = {
34+ "1" : "The response performs well on both criteria." ,
35+ "0" : "The response is somewhat aligned with both criteria" ,
36+ "-1" : "The response falls short on both criteria" ,
37+ },
38+ ),
39+ )
40+
41+ responses = [
42+ # An example of good custom_text_quality
43+ "Life is a rollercoaster, full of ups and downs, but it's the thrill that keeps us coming back for more!" ,
44+ # An example of medium custom_text_quality
45+ "The weather is nice today, not too hot, not too cold." ,
46+ # An example of poor custom_text_quality
47+ "The weather is, you know, whatever." ,
48+ ]
49+
50+ eval_dataset = pd .DataFrame ({
51+ "response" : responses ,
52+ })
53+
54+ eval_task = EvalTask (
55+ dataset = eval_dataset ,
56+ metrics = [custom_text_quality ],
57+ experiment = EXPERIMENT_NAME
58+ )
59+
60+ pointwise_result = eval_task .evaluate ()
61+ print (pointwise_result )
0 commit comments