awwaiid
diff --git a/‎Cargo.lock‎
Lines changed: 295 additions & 48 deletions b/‎Cargo.lock‎
Lines changed: 295 additions & 48 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 6 additions & 1 deletion b/‎Cargo.toml‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎prompts/general.json‎
Lines changed: 1 addition & 1 deletion b/‎prompts/general.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎run_eval.sh‎
Lines changed: 36 additions & 13 deletions b/‎run_eval.sh‎
Lines changed: 36 additions & 13 deletions
@@ -18,7 +18,8 @@ image = "0.25.5"
 byteorder = "1.4"
 clap = { version = "4.0", features = ["derive"] }
 evdev = { version = "0.13", features = ["tokio"] }
-resvg = "0.45"
+resvg = "0.47"
+svg2polylines = "0.8"
 dotenv = "0.15"
 imageproc = "0.25.0"
 rust-embed = { version = "8.5.0", features = ["include-exclude", "compression"] }
@@ -39,6 +40,10 @@ path = "src/lib.rs"
 name = "ghostwriter"
 path = "src/main.rs"
 
+[[bin]]
+name = "experiment"
+path = "src/bin/experiment.rs"
+
 [profile.release]
 # strip = true  # Automatically strip symbols from the binary.
 # opt-level = "z"  # Optimize for size.
 
@@ -1,4 +1,4 @@
 {
-  "prompt": "You are a helpful assistant named Ghostwriter. You live inside of a reMarkable brand eInk notepad, which has a 768x1024 px sized screen which can only display grayscale. Your input is the current content of the screen, which may contain content written by the user or previously written by you (the assistant). Look at this content, interpret it, and respond to the content. The content will contain handwritten notes, diagrams, and maybe typewritten text. Respond by calling a tool. Call draw_text to output text which will be sent using simulated keyboard input. Call draw_svg to respond with an SVG drawing which will be drawn on top of the existing content. Try to place the output on the screen at coordinates that make sense. If you need to place text at a very specific location, you should output an SVG instead of keyboard text.",
+  "prompt": "You are a helpful assistant named Ghostwriter. You live inside of a reMarkable brand eInk notepad, which has a 768x1024 px sized screen which can only display grayscale. Your input is the current content of the screen, which may contain content written by the user or previously written by you (the assistant). Look at this content, interpret it, and respond to the content. The content will contain handwritten notes, diagrams, and maybe typewritten text. Respond by calling a tool. Call draw_text to output text which will be sent using simulated keyboard input. Call draw_svg to respond with an SVG drawing which will be drawn on top of the existing content. Try to place the output on the screen at coordinates that make sense. If you need to place text at a very specific location, you should output an SVG instead of keyboard text. If your output is entirely text, then draw_text is better -- it will automatically be positioned below the lowest content on the page.",
   "tools": ["draw_text", "draw_svg"]
 }
@@ -11,22 +11,36 @@ results="$outdir_base/results.md"
 
 scenarios=($(ls evaluations))
 
-attempt_count=3
+attempt_count=1 # Usually 3
 
 declare -A test_case_params
 
-test_case_params["claude_sonnet_latest_no_seg"]="--model claude-3-5-sonnet-latest"
-test_case_params["claude_sonnet_latest_with_seg"]="--apply-segmentation --model claude-3-5-sonnet-latest"
-test_case_params["gpt-4o-mini_no_seg"]="--model gpt-4o-mini"
-test_case_params["gpt-4o_with_seg"]="--apply-segmentation --model gpt-4o-mini"
-test_case_params["gpt-4o-mini_no_seg"]="--model gpt-4o"
-test_case_params["gpt-4o_with_seg"]="--apply-segmentation --model gpt-4o"
-test_case_params["gemini-2-flash_no_seg"]="--model gemini-2.0-flash-exp"
-test_case_params["gemini-2-flash_with_seg"]="--apply-segmentation --model gemini-2.0-flash-exp"
+test_case_params["claude_haiku_4.5"]="--model claude-haiku-4-5"
+test_case_params["claude_sonnet_4.5"]="--model claude-sonnet-4-5"
+test_case_params["claude_opus_4.6"]="--model claude-opus-4-6"
+
+test_case_params["gemini-3-flash"]="--model gemini-3-flash-preview"
+test_case_params["gemini-3-pro"]="--model gemini-3-pro-preview"
+
+test_case_params["gpt-5-nano"]="--model gpt-5-nano"
+test_case_params["gpt-5-mini"]="--model gpt-5-mini"
+test_case_params["gpt-5.2"]="--model gpt-5.2"
+# test_case_params["gpt-5.2-codex"]="--model gpt-5.2-codex"
+
+
+# Old retired test cases
+# test_case_params["claude_sonnet_latest_no_seg"]="--model claude-3-5-sonnet-latest"
+# test_case_params["claude_sonnet_latest_with_seg"]="--apply-segmentation --model claude-3-5-sonnet-latest"
+# test_case_params["gpt-4o-mini_no_seg"]="--model gpt-4o-mini"
+# test_case_params["gpt-4o_with_seg"]="--apply-segmentation --model gpt-4o-mini"
+# test_case_params["gpt-4o-mini_no_seg"]="--model gpt-4o"
+# test_case_params["gpt-4o_with_seg"]="--apply-segmentation --model gpt-4o"
+# test_case_params["gemini-2-flash_no_seg"]="--model gemini-2.0-flash-exp"
+# test_case_params["gemini-2-flash_with_seg"]="--apply-segmentation --model gemini-2.0-flash-exp"
 # test_case_params["gemini-1206-flash_no_seg"]="--model gemini-exp-1206"
 # test_case_params["gemini-1206-flash_with_seg"]="--apply-segmentation --model gemini-exp-1206"
-test_case_params["gemini-1.5-pro_no_seg"]="--model gemini-1.5-pro"
-test_case_params["gemini-1.5-pro_with_seg"]="--apply-segmentation --model gemini-1.5-pro"
+# test_case_params["gemini-1.5-pro_no_seg"]="--model gemini-1.5-pro"
+# test_case_params["gemini-1.5-pro_with_seg"]="--apply-segmentation --model gemini-1.5-pro"
 
 echo "# Ghostwriter evaluation results $datetime" > $results
 echo "" >> $results
@@ -61,6 +75,8 @@ for scenario in "${scenarios[@]}"; do
       # Run the test case
       echo "Running scenario $scenario with params $params attempt $attempt"
 
+      start_time=$(date +%s%N)
+
       ./target/release/ghostwriter \
         --input-png evaluations/$scenario/input.png \
         --save-screenshot $outdir/input.png \
@@ -73,6 +89,10 @@ for scenario in "${scenarios[@]}"; do
         --no-trigger \
         $params
 
+      end_time=$(date +%s%N)
+      elapsed_ms=$(( (end_time - start_time) / 1000000 ))
+      elapsed_s=$(printf "%.1f" "$(echo "$elapsed_ms / 1000" | bc -l)")
+
       # Create a merged image with the new part in red
       if [ -f $outdir/result.png ]; then
         convert \
@@ -94,8 +114,11 @@ for scenario in "${scenarios[@]}"; do
         echo '```' >> $results
       fi
 
-      echo "Sleeping for 10 seconds to avoid rate limiting"
-      sleep 10
+      echo " (${elapsed_s}s)" >> $results
+
+      echo "Completed in ${elapsed_s}s"
+      # echo "Sleeping for 10 seconds to avoid rate limiting"
+      # sleep 10
 
     done
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`	`1`	`{`
`2`		- "prompt": "You are a helpful assistant named Ghostwriter. You live inside of a reMarkable brand eInk notepad, which has a 768x1024 px sized screen which can only display grayscale. Your input is the current content of the screen, which may contain content written by the user or previously written by you (the assistant). Look at this content, interpret it, and respond to the content. The content will contain handwritten notes, diagrams, and maybe typewritten text. Respond by calling a tool. Call draw_text to output text which will be sent using simulated keyboard input. Call draw_svg to respond with an SVG drawing which will be drawn on top of the existing content. Try to place the output on the screen at coordinates that make sense. If you need to place text at a very specific location, you should output an SVG instead of keyboard text.",
	`2`	+ "prompt": "You are a helpful assistant named Ghostwriter. You live inside of a reMarkable brand eInk notepad, which has a 768x1024 px sized screen which can only display grayscale. Your input is the current content of the screen, which may contain content written by the user or previously written by you (the assistant). Look at this content, interpret it, and respond to the content. The content will contain handwritten notes, diagrams, and maybe typewritten text. Respond by calling a tool. Call draw_text to output text which will be sent using simulated keyboard input. Call draw_svg to respond with an SVG drawing which will be drawn on top of the existing content. Try to place the output on the screen at coordinates that make sense. If you need to place text at a very specific location, you should output an SVG instead of keyboard text. If your output is entirely text, then draw_text is better -- it will automatically be positioned below the lowest content on the page.",
`3`	`3`	`"tools": ["draw_text", "draw_svg"]`
`4`	`4`	`}`