From a20c7dbfc8a5cd0dbb4648c58613343484e7e50f Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 13:21:12 +1100 Subject: [PATCH 01/12] fix: Create correct constraint name/error message as per Ecto expectations and naming conventions --- CHANGELOG.md | 15 +++++ lib/ecto/adapters/libsql/connection.ex | 76 ++++++++++++++++++++------ test/ecto_connection_test.exs | 36 +++++++++++- test/ecto_integration_test.exs | 2 +- 4 files changed, 110 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4f88fb0..08aca246 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- **Constraint Error Handling: Index Name Reconstruction (Issue #34)** + - Improved constraint name extraction to reconstruct full index names from SQLite error messages + - Now follows Ecto's naming convention: `table_column1_column2_index` + - **Single-column constraints**: `"UNIQUE constraint failed: users.email"` → `"users_email_index"` (previously just `"email"`) + - **Multi-column constraints**: `"UNIQUE constraint failed: users.slug, users.parent_slug"` → `"users_slug_parent_slug_index"` + - **Backtick handling**: Properly strips trailing backticks appended by libSQL to error messages + - **Enhanced error messages**: Preserves custom index names from enhanced format `(index: custom_index_name)` + - **NOT NULL constraints**: Reconstructs index names following same convention + - Enables accurate `unique_constraint/3` and `check_constraint/3` matching with custom index names in Ecto changesets + - Added comprehensive test coverage for all constraint scenarios (4 new tests) + ## [0.8.0] - 2025-12-17 ### Changed diff --git a/lib/ecto/adapters/libsql/connection.ex b/lib/ecto/adapters/libsql/connection.ex index 02906584..c69a385d 100644 --- a/lib/ecto/adapters/libsql/connection.ex +++ b/lib/ecto/adapters/libsql/connection.ex @@ -87,36 +87,80 @@ defmodule Ecto.Adapters.LibSql.Connection do end defp extract_constraint_name(message) do - # Extract constraint name from SQLite error messages + # Extract constraint name from SQLite error messages. # # SQLite only reports column names in constraint errors, not index names. - # However, ecto_libsql enhances error messages to include the actual index name - # by querying SQLite metadata. This allows users to use custom index names in - # their changesets with unique_constraint/3. + # We reconstruct the index name following Ecto's naming convention: + # table_column1_column2_index # - # Enhanced format (when index is found): - # "UNIQUE constraint failed: users.email (index: users_email_index)" -> "users_email_index" + # Examples: + # "UNIQUE constraint failed: users.email" -> "users_email_index" + # "UNIQUE constraint failed: users.slug, users.parent_slug" -> "users_slug_parent_slug_index" + # "NOT NULL constraint failed: users.name" -> "users_name_index" + # "CHECK constraint failed: positive_age" -> "positive_age" # - # Standard formats (fallback to column name): - # "UNIQUE constraint failed: users.email" -> "email" - # "NOT NULL constraint failed: users.name" -> "name" - # "UNIQUE constraint failed: users.slug, users.parent_slug" -> "slug" - # - # First, try to extract the index name from enhanced error messages + # First, try to extract the index name from enhanced error messages (if present) case Regex.run(~r/\(index: ([\w_]+)\)/, message) do [_, index_name] -> # Found enhanced error with actual index name index_name nil -> - # No index name in message, fall back to column name extraction - case Regex.run(~r/constraint failed: (?:\w+\.)?(\w+)/, message) do - [_, name] -> name - _ -> "unknown" + # No index name in message, reconstruct from column names + case Regex.run(~r/constraint failed: (.+)$/, message) do + [_, constraint_part] -> + # Strip any trailing backticks that libSQL might add to error messages + cleaned = constraint_part |> String.trim() |> String.trim_trailing("`") + constraint_name_hack(cleaned) + + _ -> + "unknown" end end end + # Reconstruct index names from SQLite constraint error messages. + # This follows Ecto's convention: table_column1_column2_index + defp constraint_name_hack(constraint) do + # Helper to clean backticks from identifiers (libSQL sometimes adds them) + clean = fn s -> String.trim(s, "`") end + + if String.contains?(constraint, ", ") do + # Multi-column constraint: "table.col1, table.col2" -> "table_col1_col2_index" + constraint + |> String.split(", ") + |> Enum.with_index() + |> Enum.map(fn + {table_col, 0} -> + # First column includes table name + table_col |> clean.() |> String.replace(".", "_") + + {table_col, _} -> + # Subsequent columns: only take the column name + table_col + |> clean.() + |> String.split(".") + |> List.last() + |> clean.() + end) + |> Enum.concat(["index"]) + |> Enum.join("_") + else + if String.contains?(constraint, ".") do + # Single column: "table.column" -> "table_column_index" + constraint + |> clean.() + |> String.split(".") + |> Enum.map(clean) + |> Enum.concat(["index"]) + |> Enum.join("_") + else + # No table prefix (e.g., CHECK constraint name): return as-is + clean.(constraint) + end + end + end + ## DDL Generation @impl true diff --git a/test/ecto_connection_test.exs b/test/ecto_connection_test.exs index 03ab6950..5e0a59f6 100644 --- a/test/ecto_connection_test.exs +++ b/test/ecto_connection_test.exs @@ -413,8 +413,8 @@ defmodule Ecto.Adapters.LibSql.ConnectionTest do error = %{message: "UNIQUE constraint failed: users.email"} constraints = Connection.to_constraints(error, []) - # Returns string constraint names to match Ecto changeset format - assert [unique: "email"] = constraints + # Reconstructs index name following Ecto's naming convention: table_column_index + assert [unique: "users_email_index"] = constraints end test "converts FOREIGN KEY constraint errors" do @@ -438,6 +438,38 @@ defmodule Ecto.Adapters.LibSql.ConnectionTest do assert [] = constraints end + + test "converts multi-column UNIQUE constraint errors" do + error = %{message: "UNIQUE constraint failed: users.slug, users.parent_slug"} + constraints = Connection.to_constraints(error, []) + + # Reconstructs index name from multiple columns: table_col1_col2_index + assert [unique: "users_slug_parent_slug_index"] = constraints + end + + test "converts NOT NULL constraint errors" do + error = %{message: "NOT NULL constraint failed: users.name"} + constraints = Connection.to_constraints(error, []) + + # NOT NULL constraints are reported as check constraints with reconstructed index name + assert [check: "users_name_index"] = constraints + end + + test "handles backticks in constraint error messages" do + error = %{message: "UNIQUE constraint failed: users.email`"} + constraints = Connection.to_constraints(error, []) + + # Properly strips backticks appended by libSQL + assert [unique: "users_email_index"] = constraints + end + + test "preserves enhanced error messages with index name" do + error = %{message: "UNIQUE constraint failed: users.email (index: custom_email_index)"} + constraints = Connection.to_constraints(error, []) + + # Uses the provided index name from enhanced error + assert [unique: "custom_email_index"] = constraints + end end describe "on_conflict insert" do diff --git a/test/ecto_integration_test.exs b/test/ecto_integration_test.exs index 123469c1..90e87979 100644 --- a/test/ecto_integration_test.exs +++ b/test/ecto_integration_test.exs @@ -30,7 +30,7 @@ defmodule Ecto.Integration.EctoLibSqlTest do |> cast(attrs, [:name, :email, :age, :active, :balance, :bio]) |> validate_required([:name, :email]) |> validate_format(:email, ~r/@/) - |> unique_constraint(:email, name: "email") + |> unique_constraint(:email, name: "users_email_index") end end From 9c0460bfef2976ec49ea11fce158e91794b588f5 Mon Sep 17 00:00:00 2001 From: "coderabbitai[bot]" <136622811+coderabbitai[bot]@users.noreply.github.com> Date: Wed, 17 Dec 2025 02:43:32 +0000 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=93=9D=20Add=20docstrings=20to=20`f?= =?UTF-8?q?ix-constraint-errors`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docstrings generation was requested by @ocean. * https://github.com/ocean/ecto_libsql/pull/35#issuecomment-3663385463 The following files were modified: * `lib/ecto/adapters/libsql/connection.ex` --- lib/ecto/adapters/libsql/connection.ex | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/ecto/adapters/libsql/connection.ex b/lib/ecto/adapters/libsql/connection.ex index c69a385d..52206eb3 100644 --- a/lib/ecto/adapters/libsql/connection.ex +++ b/lib/ecto/adapters/libsql/connection.ex @@ -66,6 +66,21 @@ defmodule Ecto.Adapters.LibSql.Connection do end @impl true + @doc """ + Parse a SQLite error message and map it to a list of Ecto constraint tuples. + + Accepts an exception-like map containing a SQLite error `:message` and returns recognised constraint information such as unique, foreign_key or check constraints; returns an empty list when no known constraint pattern is found. + + ## Parameters + + - error: Map containing a `:message` string produced by SQLite. + - _opts: Options (unused). + + ## Returns + + - A keyword list of constraint tuples, for example `[unique: "table_column_index"]`, `[foreign_key: :unknown]`, `[check: "constraint_name"]`, or `[]` when no constraint is recognised. + """ + @spec to_constraints(%{message: String.t()}, Keyword.t()) :: Keyword.t() def to_constraints(%{message: message}, _opts) do cond do String.contains?(message, "UNIQUE constraint failed") -> @@ -1032,4 +1047,4 @@ defmodule Ecto.Adapters.LibSql.Connection do defp intersperse_reduce([], _separator, count, [], _reducer) do {[], count} end -end +end \ No newline at end of file From 132e10edf79ebddb812011bc428158b12c5c1ec5 Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 13:56:02 +1100 Subject: [PATCH 03/12] Clean up documentation formatting in connection.ex Removed unnecessary blank lines in the documentation for the to_constraints function. --- lib/ecto/adapters/libsql/connection.ex | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/ecto/adapters/libsql/connection.ex b/lib/ecto/adapters/libsql/connection.ex index 52206eb3..7d60029e 100644 --- a/lib/ecto/adapters/libsql/connection.ex +++ b/lib/ecto/adapters/libsql/connection.ex @@ -68,16 +68,16 @@ defmodule Ecto.Adapters.LibSql.Connection do @impl true @doc """ Parse a SQLite error message and map it to a list of Ecto constraint tuples. - + Accepts an exception-like map containing a SQLite error `:message` and returns recognised constraint information such as unique, foreign_key or check constraints; returns an empty list when no known constraint pattern is found. - + ## Parameters - + - error: Map containing a `:message` string produced by SQLite. - _opts: Options (unused). - + ## Returns - + - A keyword list of constraint tuples, for example `[unique: "table_column_index"]`, `[foreign_key: :unknown]`, `[check: "constraint_name"]`, or `[]` when no constraint is recognised. """ @spec to_constraints(%{message: String.t()}, Keyword.t()) :: Keyword.t() @@ -1047,4 +1047,4 @@ defmodule Ecto.Adapters.LibSql.Connection do defp intersperse_reduce([], _separator, count, [], _reducer) do {[], count} end -end \ No newline at end of file +end From f58b51b0c25d2cb4dd7a09f143ab521467f5cc27 Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 15:25:11 +1100 Subject: [PATCH 04/12] fix: Improve multi-column constraint name generation logic --- lib/ecto/adapters/libsql/connection.ex | 28 +++++++++----------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/lib/ecto/adapters/libsql/connection.ex b/lib/ecto/adapters/libsql/connection.ex index 7d60029e..c4e2e7fe 100644 --- a/lib/ecto/adapters/libsql/connection.ex +++ b/lib/ecto/adapters/libsql/connection.ex @@ -142,24 +142,16 @@ defmodule Ecto.Adapters.LibSql.Connection do if String.contains?(constraint, ", ") do # Multi-column constraint: "table.col1, table.col2" -> "table_col1_col2_index" - constraint - |> String.split(", ") - |> Enum.with_index() - |> Enum.map(fn - {table_col, 0} -> - # First column includes table name - table_col |> clean.() |> String.replace(".", "_") - - {table_col, _} -> - # Subsequent columns: only take the column name - table_col - |> clean.() - |> String.split(".") - |> List.last() - |> clean.() - end) - |> Enum.concat(["index"]) - |> Enum.join("_") + [first | rest] = String.split(constraint, ", ") + + table_col = first |> clean.() |> String.replace(".", "_") + + cols = + Enum.map(rest, fn col -> + col |> clean.() |> String.split(".") |> List.last() |> clean.() + end) + + [table_col | cols] |> Enum.concat(["index"]) |> Enum.join("_") else if String.contains?(constraint, ".") do # Single column: "table.column" -> "table_column_index" From 9c1489bef9af17bc21ec802024207f050e16e846 Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 18:07:45 +1100 Subject: [PATCH 05/12] chore: Attempt setup of Dialyzer --- .dialyzer_ignore.exs | 25 +++++++++++++++++++++++++ .gitignore | 3 +++ mix.exs | 17 ++++++++++++++--- mix.lock | 2 ++ 4 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 .dialyzer_ignore.exs diff --git a/.dialyzer_ignore.exs b/.dialyzer_ignore.exs new file mode 100644 index 00000000..28ec18c3 --- /dev/null +++ b/.dialyzer_ignore.exs @@ -0,0 +1,25 @@ +{"lib/ecto/adapters/libsql.ex", "Function rollback/2 has no local return."} +{"lib/ecto/adapters/libsql.ex", "The pattern can never match the type + {:error, %EctoLibSql.Error{:__exception__ => true, :message => _, :sqlite => nil}, + %EctoLibSql.State{:conn_id => _, _ => _}} + | {:ok, %EctoLibSql.Query{:statement => _, _ => _}, + %EctoLibSql.Result{ + :columns => _, + :command => + :begin + | :commit + | :create + | :delete + | :insert + | :rollback + | :select + | :unknown + | :update, + :num_rows => _, + :rows => _ + }, %EctoLibSql.State{:conn_id => _, _ => _}} +."} +{"lib/ecto/adapters/libsql.ex", "Type mismatch for @callback dump_cmd."} +{"lib/ecto/adapters/libsql/connection.ex", "Spec type mismatch in argument to callback to_constraints."} +{"lib/ecto/adapters/libsql/connection.ex", "Type mismatch with behaviour callback to explain_query/4."} +{"lib/ecto/adapters/libsql/connection.ex", "List construction (cons) will produce an improper list, because its second argument is <<_::64>>."} diff --git a/.gitignore b/.gitignore index 6e3c3029..3c23209c 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,9 @@ ecto_libsql-*.tar /priv/native/*.dll /priv/native/*.dylib +# Erlang PLTs for Dialyzer +/priv/plts/ + # Test databases z_ecto_libsql_test-*.db z_ecto_libsql_test-*.db-* diff --git a/mix.exs b/mix.exs index 98b2e09d..7e22f855 100644 --- a/mix.exs +++ b/mix.exs @@ -16,7 +16,17 @@ defmodule EctoLibSql.MixProject do homepage_url: @source_url, package: package(), description: description(), - docs: docs() + docs: docs(), + dialyzer: [ + plt_core_path: "priv/plts", + app_tree: true, + plt_add_apps: [:mix, :ex_unit], + ignore_warnings: ".dialyzer_ignore.exs", + list_unused_filters: true + ], + aliases: [ + "check.dialyzer": "dialyzer" + ] ] end @@ -38,12 +48,13 @@ defmodule EctoLibSql.MixProject do defp deps do [ - {:rustler, "~> 0.37.1"}, {:db_connection, "~> 2.1"}, + {:dialyxir, "~> 1.4", only: [:dev], runtime: false}, {:ecto, "~> 3.11"}, {:ecto_sql, "~> 3.11"}, {:ex_doc, "~> 0.31", only: :dev, runtime: false}, - {:jason, "~> 1.4"} + {:jason, "~> 1.4"}, + {:rustler, "~> 0.37.1"} ] end diff --git a/mix.lock b/mix.lock index 9af05434..ac770351 100644 --- a/mix.lock +++ b/mix.lock @@ -1,9 +1,11 @@ %{ "db_connection": {:hex, :db_connection, "2.8.1", "9abdc1e68c34c6163f6fb96a96532272d13ad7ca45262156ae8b7ec6d9dc4bec", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "a61a3d489b239d76f326e03b98794fb8e45168396c925ef25feb405ed09da8fd"}, "decimal": {:hex, :decimal, "2.3.0", "3ad6255aa77b4a3c4f818171b12d237500e63525c2fd056699967a3e7ea20f62", [:mix], [], "hexpm", "a4d66355cb29cb47c3cf30e71329e58361cfcb37c34235ef3bf1d7bf3773aeac"}, + "dialyxir": {:hex, :dialyxir, "1.4.7", "dda948fcee52962e4b6c5b4b16b2d8fa7d50d8645bbae8b8685c3f9ecb7f5f4d", [:mix], [{:erlex, ">= 0.2.8", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b34527202e6eb8cee198efec110996c25c5898f43a4094df157f8d28f27d9efe"}, "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, "ecto": {:hex, :ecto, "3.13.5", "9d4a69700183f33bf97208294768e561f5c7f1ecf417e0fa1006e4a91713a834", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "df9efebf70cf94142739ba357499661ef5dbb559ef902b68ea1f3c1fabce36de"}, "ecto_sql": {:hex, :ecto_sql, "3.13.3", "81f7067dd1951081888529002dbc71f54e5e891b69c60195040ea44697e1104a", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "5751caea36c8f5dd0d1de6f37eceffea19d10bd53f20e5bbe31c45f2efc8944a"}, + "erlex": {:hex, :erlex, "0.2.8", "cd8116f20f3c0afe376d1e8d1f0ae2452337729f68be016ea544a72f767d9c12", [:mix], [], "hexpm", "9d66ff9fedf69e49dc3fd12831e12a8a37b76f8651dd21cd45fcf5561a8a7590"}, "ex_doc": {:hex, :ex_doc, "0.39.3", "519c6bc7e84a2918b737aec7ef48b96aa4698342927d080437f61395d361dcee", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "0590955cf7ad3b625780ee1c1ea627c28a78948c6c0a9b0322bd976a079996e1"}, "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, From eda25de1a77ab6c735c60fef1528cea2871b7f30 Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 18:08:06 +1100 Subject: [PATCH 06/12] fix: Fix nits in connection file --- lib/ecto/adapters/libsql/connection.ex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/ecto/adapters/libsql/connection.ex b/lib/ecto/adapters/libsql/connection.ex index c4e2e7fe..9880da20 100644 --- a/lib/ecto/adapters/libsql/connection.ex +++ b/lib/ecto/adapters/libsql/connection.ex @@ -148,7 +148,7 @@ defmodule Ecto.Adapters.LibSql.Connection do cols = Enum.map(rest, fn col -> - col |> clean.() |> String.split(".") |> List.last() |> clean.() + col |> clean.() |> String.split(".") |> List.last() end) [table_col | cols] |> Enum.concat(["index"]) |> Enum.join("_") @@ -158,7 +158,6 @@ defmodule Ecto.Adapters.LibSql.Connection do constraint |> clean.() |> String.split(".") - |> Enum.map(clean) |> Enum.concat(["index"]) |> Enum.join("_") else From 1d83f435cd85a0fdd950bbe4668c477b567dc4a0 Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 18:08:32 +1100 Subject: [PATCH 07/12] tests: Add basic geospatial tests for vector functionality --- test/vector_geospatial_test.exs | 348 ++++++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 test/vector_geospatial_test.exs diff --git a/test/vector_geospatial_test.exs b/test/vector_geospatial_test.exs new file mode 100644 index 00000000..40b3f73e --- /dev/null +++ b/test/vector_geospatial_test.exs @@ -0,0 +1,348 @@ +defmodule Ecto.Vector.GeospatialTest do + use ExUnit.Case, async: false + + # Define test modules for Ecto schemas and repo + defmodule TestRepo do + use Ecto.Repo, + otp_app: :ecto_libsql, + adapter: Ecto.Adapters.LibSql + end + + defmodule Location do + use Ecto.Schema + import Ecto.Changeset + + schema "locations" do + field(:name, :string) + field(:latitude, :float) + field(:longitude, :float) + field(:embedding, :string) + field(:city, :string) + field(:country, :string) + + timestamps() + end + + def changeset(location, attrs) do + location + |> cast(attrs, [:name, :latitude, :longitude, :embedding, :city, :country]) + |> validate_required([:name, :latitude, :longitude]) + end + end + + @test_db "z_ecto_libsql_test-vector_geospatial.db" + + setup_all do + # Start the test repo + {:ok, _} = TestRepo.start_link(database: @test_db) + + # Create table with vector column for 2D coordinate embeddings + # Using F32_BLOB(2) for latitude/longitude pairs + Ecto.Adapters.SQL.query!(TestRepo, """ + CREATE TABLE IF NOT EXISTS locations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + latitude REAL NOT NULL, + longitude REAL NOT NULL, + embedding F32_BLOB(2), + city TEXT, + country TEXT, + inserted_at DATETIME, + updated_at DATETIME + ) + """) + + on_exit(fn -> + File.rm(@test_db) + end) + + :ok + end + + setup do + # Clean tables before each test + Ecto.Adapters.SQL.query!(TestRepo, "DELETE FROM locations") + :ok + end + + describe "vector geospatial search" do + test "finds nearest locations by cosine distance" do + # Insert famous world cities with their coordinates normalized to [-1, 1] range + # Real coordinates: latitude [-90, 90], longitude [-180, 180] + # Normalized: divide by max (90 for lat, 180 for lon) to get [-1, 1] range + + cities = [ + # Sydney, Australia (-33.87, 151.21) + { + "Sydney", + -33.87, + 151.21, + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + "Sydney", + "Australia" + }, + # Melbourne, Australia (-37.81, 144.96) + { + "Melbourne", + -37.81, + 144.96, + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + "Melbourne", + "Australia" + }, + # Auckland, New Zealand (-37.01, 174.88) + { + "Auckland", + -37.01, + 174.88, + EctoLibSql.Native.vector([-37.01 / 90, 174.88 / 180]), + "Auckland", + "New Zealand" + }, + # Tokyo, Japan (35.68, 139.69) + { + "Tokyo", + 35.68, + 139.69, + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + "Tokyo", + "Japan" + }, + # New York, USA (40.71, -74.01) + { + "New York", + 40.71, + -74.01, + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]), + "New York", + "USA" + } + ] + + # Insert all cities + Enum.each(cities, fn {name, lat, lon, embedding, city, country} -> + TestRepo.insert!(%Location{ + name: name, + latitude: lat, + longitude: lon, + embedding: embedding, + city: city, + country: country + }) + end) + + # Search for locations nearest to Sydney + # Sydney normalized: [-33.87/90, 151.21/180] ≈ [-0.3764, 0.8400] + sydney_embedding = EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]) + + # Query using cosine distance + result = + Ecto.Adapters.SQL.query!(TestRepo, """ + SELECT name, city, country, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + ORDER BY distance + LIMIT 3 + """, [sydney_embedding]) + + # Should return Sydney first (distance 0 to itself), followed by other cities + assert result.num_rows == 3 + [[sydney_name, _, _, sydney_dist], [second_name, _, _, second_dist], [third_name, _, _, third_dist]] = + result.rows + + assert sydney_name == "Sydney" + + # Sydney should be closest to itself (distance very close to 0) + assert sydney_dist < 0.001 + + # Verify other results are farther than Sydney + assert second_dist > sydney_dist + assert third_dist > sydney_dist + + # All results should be valid city names + assert second_name in ["Melbourne", "Auckland", "Tokyo", "New York"] + assert third_name in ["Melbourne", "Auckland", "Tokyo", "New York"] + end + + test "filters nearest locations by region" do + # Insert cities from different regions + Ecto.Adapters.SQL.query!(TestRepo, """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('Osaka', 34.67, 135.50, vector(?), 'Osaka', 'Japan', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) + """, [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([34.67 / 90, 135.50 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) + ]) + + # Find nearest location to Tokyo, but only in Asia + tokyo_embedding = EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]) + + result = + Ecto.Adapters.SQL.query!(TestRepo, """ + SELECT name, city, country, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE country IN ('Japan', 'Australia') + ORDER BY distance + LIMIT 2 + """, [tokyo_embedding]) + + assert result.num_rows == 2 + [[first_name, _, first_country, first_dist], [second_name, _, _second_country, _second_dist]] = + result.rows + + # Tokyo should be first (distance 0) + assert first_name == "Tokyo" + assert first_country == "Japan" + assert first_dist < 0.001 + + # Osaka should be second (closest other Japan city to Tokyo) + assert second_name == "Osaka" + end + + test "searches within distance threshold" do + # Insert cities + Ecto.Adapters.SQL.query!(TestRepo, """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Brisbane', -27.47, 153.03, vector(?), 'Brisbane', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) + """, [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([-27.47 / 90, 153.03 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) + ]) + + # Search for locations within a certain distance of Sydney + # Using a threshold of 0.15 (roughly 15% of max distance in normalized space) + sydney_embedding = EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]) + + result = + Ecto.Adapters.SQL.query!(TestRepo, """ + SELECT name, country, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE vector_distance_cos(embedding, vector(?)) < 0.15 + ORDER BY distance + """, [sydney_embedding, sydney_embedding]) + + # Should find Sydney and nearby Australian cities + names = Enum.map(result.rows, fn [name, _, _] -> name end) + + assert "Sydney" in names + assert "Melbourne" in names + assert "Brisbane" in names + # Tokyo and New York should be too far (distance > 0.15) + assert "Tokyo" not in names + assert "New York" not in names + end + + test "aggregates nearest neighbors by country" do + # Insert multiple cities per country + Ecto.Adapters.SQL.query!(TestRepo, """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Brisbane', -27.47, 153.03, vector(?), 'Brisbane', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('Osaka', 34.67, 135.50, vector(?), 'Osaka', 'Japan', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')), + ('Los Angeles', 34.05, -118.24, vector(?), 'Los Angeles', 'USA', datetime('now'), datetime('now')) + """, [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([-27.47 / 90, 153.03 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([34.67 / 90, 135.50 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]), + EctoLibSql.Native.vector([34.05 / 90, -118.24 / 180]) + ]) + + # Find the closest location to Sydney in each country + sydney_embedding = EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]) + + result = + Ecto.Adapters.SQL.query!(TestRepo, """ + SELECT + country, + name, + vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE country != 'Australia' + ORDER BY country, distance + """, [sydney_embedding]) + + assert result.num_rows == 4 + rows = result.rows + + # Extract Japan results + japan_rows = Enum.filter(rows, fn [country, _, _] -> country == "Japan" end) + assert length(japan_rows) == 2 + [[japan_country, japan_city, japan_dist], [_, _, second_japan_dist]] = japan_rows + assert japan_country == "Japan" + assert japan_city == "Tokyo" + assert japan_dist < second_japan_dist + + # Extract USA results + usa_rows = Enum.filter(rows, fn [country, _, _] -> country == "USA" end) + assert length(usa_rows) == 2 + [[usa_country, _usa_city, usa_dist], [_, _, second_usa_dist]] = usa_rows + assert usa_country == "USA" + assert usa_dist < second_usa_dist + end + + test "finds approximate locations using vector ranges" do + # Insert locations in specific regions + Ecto.Adapters.SQL.query!(TestRepo, """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('Bangkok', 13.73, 100.50, vector(?), 'Bangkok', 'Thailand', datetime('now'), datetime('now')), + ('Singapore', 1.35, 103.82, vector(?), 'Singapore', 'Singapore', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) + """, [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([13.73 / 90, 100.50 / 180]), + EctoLibSql.Native.vector([1.35 / 90, 103.82 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) + ]) + + # Find locations in Southeast Asia (roughly 0-30° N, 95-140° E) + # Normalized: latitude [0/90, 30/90] = [0, 0.33], longitude [95/180, 140/180] = [0.53, 0.78] + result = + Ecto.Adapters.SQL.query!(TestRepo, """ + SELECT name, latitude, longitude, city, country + FROM locations + WHERE city IN ( + SELECT city FROM locations + WHERE latitude > 0 AND latitude < 30 + AND longitude > 95 AND longitude < 140 + ) + ORDER BY name + """) + + names = Enum.map(result.rows, fn [name, _, _, _, _] -> name end) + + assert length(names) == 2 + assert "Bangkok" in names + assert "Singapore" in names + assert "Sydney" not in names + assert "New York" not in names + end + end +end From 8303add79ddcf0852088b3429a186dbcc787c2f8 Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 19:01:44 +1100 Subject: [PATCH 08/12] chore: Fix formatting --- test/vector_geospatial_test.exs | 233 +++++++++++++++++++------------- 1 file changed, 137 insertions(+), 96 deletions(-) diff --git a/test/vector_geospatial_test.exs b/test/vector_geospatial_test.exs index 40b3f73e..950d6356 100644 --- a/test/vector_geospatial_test.exs +++ b/test/vector_geospatial_test.exs @@ -137,16 +137,25 @@ defmodule Ecto.Vector.GeospatialTest do # Query using cosine distance result = - Ecto.Adapters.SQL.query!(TestRepo, """ - SELECT name, city, country, vector_distance_cos(embedding, vector(?)) as distance - FROM locations - ORDER BY distance - LIMIT 3 - """, [sydney_embedding]) + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, city, country, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + ORDER BY distance + LIMIT 3 + """, + [sydney_embedding] + ) # Should return Sydney first (distance 0 to itself), followed by other cities assert result.num_rows == 3 - [[sydney_name, _, _, sydney_dist], [second_name, _, _, second_dist], [third_name, _, _, third_dist]] = + + [ + [sydney_name, _, _, sydney_dist], + [second_name, _, _, second_dist], + [third_name, _, _, third_dist] + ] = result.rows assert sydney_name == "Sydney" @@ -165,36 +174,48 @@ defmodule Ecto.Vector.GeospatialTest do test "filters nearest locations by region" do # Insert cities from different regions - Ecto.Adapters.SQL.query!(TestRepo, """ - INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) - VALUES - ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), - ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), - ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), - ('Osaka', 34.67, 135.50, vector(?), 'Osaka', 'Japan', datetime('now'), datetime('now')), - ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) - """, [ - EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), - EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), - EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), - EctoLibSql.Native.vector([34.67 / 90, 135.50 / 180]), - EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) - ]) + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('Osaka', 34.67, 135.50, vector(?), 'Osaka', 'Japan', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([34.67 / 90, 135.50 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) + ] + ) # Find nearest location to Tokyo, but only in Asia tokyo_embedding = EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]) result = - Ecto.Adapters.SQL.query!(TestRepo, """ - SELECT name, city, country, vector_distance_cos(embedding, vector(?)) as distance - FROM locations - WHERE country IN ('Japan', 'Australia') - ORDER BY distance - LIMIT 2 - """, [tokyo_embedding]) + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, city, country, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE country IN ('Japan', 'Australia') + ORDER BY distance + LIMIT 2 + """, + [tokyo_embedding] + ) assert result.num_rows == 2 - [[first_name, _, first_country, first_dist], [second_name, _, _second_country, _second_dist]] = + + [ + [first_name, _, first_country, first_dist], + [second_name, _, _second_country, _second_dist] + ] = result.rows # Tokyo should be first (distance 0) @@ -208,33 +229,41 @@ defmodule Ecto.Vector.GeospatialTest do test "searches within distance threshold" do # Insert cities - Ecto.Adapters.SQL.query!(TestRepo, """ - INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) - VALUES - ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), - ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), - ('Brisbane', -27.47, 153.03, vector(?), 'Brisbane', 'Australia', datetime('now'), datetime('now')), - ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), - ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) - """, [ - EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), - EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), - EctoLibSql.Native.vector([-27.47 / 90, 153.03 / 180]), - EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), - EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) - ]) + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Brisbane', -27.47, 153.03, vector(?), 'Brisbane', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([-27.47 / 90, 153.03 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) + ] + ) # Search for locations within a certain distance of Sydney # Using a threshold of 0.15 (roughly 15% of max distance in normalized space) sydney_embedding = EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]) result = - Ecto.Adapters.SQL.query!(TestRepo, """ - SELECT name, country, vector_distance_cos(embedding, vector(?)) as distance - FROM locations - WHERE vector_distance_cos(embedding, vector(?)) < 0.15 - ORDER BY distance - """, [sydney_embedding, sydney_embedding]) + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, country, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE vector_distance_cos(embedding, vector(?)) < 0.15 + ORDER BY distance + """, + [sydney_embedding, sydney_embedding] + ) # Should find Sydney and nearby Australian cities names = Enum.map(result.rows, fn [name, _, _] -> name end) @@ -249,39 +278,47 @@ defmodule Ecto.Vector.GeospatialTest do test "aggregates nearest neighbors by country" do # Insert multiple cities per country - Ecto.Adapters.SQL.query!(TestRepo, """ - INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) - VALUES - ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), - ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), - ('Brisbane', -27.47, 153.03, vector(?), 'Brisbane', 'Australia', datetime('now'), datetime('now')), - ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), - ('Osaka', 34.67, 135.50, vector(?), 'Osaka', 'Japan', datetime('now'), datetime('now')), - ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')), - ('Los Angeles', 34.05, -118.24, vector(?), 'Los Angeles', 'USA', datetime('now'), datetime('now')) - """, [ - EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), - EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), - EctoLibSql.Native.vector([-27.47 / 90, 153.03 / 180]), - EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), - EctoLibSql.Native.vector([34.67 / 90, 135.50 / 180]), - EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]), - EctoLibSql.Native.vector([34.05 / 90, -118.24 / 180]) - ]) + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Brisbane', -27.47, 153.03, vector(?), 'Brisbane', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('Osaka', 34.67, 135.50, vector(?), 'Osaka', 'Japan', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')), + ('Los Angeles', 34.05, -118.24, vector(?), 'Los Angeles', 'USA', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([-27.47 / 90, 153.03 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([34.67 / 90, 135.50 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]), + EctoLibSql.Native.vector([34.05 / 90, -118.24 / 180]) + ] + ) # Find the closest location to Sydney in each country sydney_embedding = EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]) result = - Ecto.Adapters.SQL.query!(TestRepo, """ - SELECT - country, - name, - vector_distance_cos(embedding, vector(?)) as distance - FROM locations - WHERE country != 'Australia' - ORDER BY country, distance - """, [sydney_embedding]) + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT + country, + name, + vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE country != 'Australia' + ORDER BY country, distance + """, + [sydney_embedding] + ) assert result.num_rows == 4 rows = result.rows @@ -304,23 +341,27 @@ defmodule Ecto.Vector.GeospatialTest do test "finds approximate locations using vector ranges" do # Insert locations in specific regions - Ecto.Adapters.SQL.query!(TestRepo, """ - INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) - VALUES - ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), - ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), - ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), - ('Bangkok', 13.73, 100.50, vector(?), 'Bangkok', 'Thailand', datetime('now'), datetime('now')), - ('Singapore', 1.35, 103.82, vector(?), 'Singapore', 'Singapore', datetime('now'), datetime('now')), - ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) - """, [ - EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), - EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), - EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), - EctoLibSql.Native.vector([13.73 / 90, 100.50 / 180]), - EctoLibSql.Native.vector([1.35 / 90, 103.82 / 180]), - EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) - ]) + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('Bangkok', 13.73, 100.50, vector(?), 'Bangkok', 'Thailand', datetime('now'), datetime('now')), + ('Singapore', 1.35, 103.82, vector(?), 'Singapore', 'Singapore', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([13.73 / 90, 100.50 / 180]), + EctoLibSql.Native.vector([1.35 / 90, 103.82 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) + ] + ) # Find locations in Southeast Asia (roughly 0-30° N, 95-140° E) # Normalized: latitude [0/90, 30/90] = [0, 0.33], longitude [95/180, 140/180] = [0.53, 0.78] From 663d921705f71591e45f710e8e4e3297d2f0a517 Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 19:06:12 +1100 Subject: [PATCH 09/12] tests: Add some more complex geospatial tests --- test/vector_geospatial_test.exs | 499 ++++++++++++++++++++++++++++++++ 1 file changed, 499 insertions(+) diff --git a/test/vector_geospatial_test.exs b/test/vector_geospatial_test.exs index 950d6356..86b66465 100644 --- a/test/vector_geospatial_test.exs +++ b/test/vector_geospatial_test.exs @@ -54,6 +54,8 @@ defmodule Ecto.Vector.GeospatialTest do on_exit(fn -> File.rm(@test_db) + File.rm(@test_db <> "-wal") + File.rm(@test_db <> "-shm") end) :ok @@ -386,4 +388,501 @@ defmodule Ecto.Vector.GeospatialTest do assert "New York" not in names end end + + describe "vector edge cases" do + test "handles NULL embeddings gracefully" do + # Insert location with NULL embedding + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Unknown', 0.0, 0.0, NULL, 'Unknown', 'Unknown', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]) + ] + ) + + # Query should filter out NULL embeddings + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, city + FROM locations + WHERE embedding IS NOT NULL + ORDER BY name + """ + ) + + assert result.num_rows == 2 + names = Enum.map(result.rows, fn [name, _] -> name end) + assert "Sydney" in names + assert "Tokyo" in names + assert "Unknown" not in names + end + + test "returns empty result set when no locations match" do + # Insert only locations far from query point + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]) + ] + ) + + # Query with impossible distance threshold + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, city, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE vector_distance_cos(embedding, vector(?)) < 0.01 + ORDER BY distance + """, + [ + EctoLibSql.Native.vector([0.5, 0.5]), + EctoLibSql.Native.vector([0.5, 0.5]) + ] + ) + + assert result.num_rows == 0 + end + + test "handles zero distance (identical embeddings)" do + # Insert same location twice + embedding = EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]) + + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Sydney Copy', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')) + """, + [embedding, embedding, EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180])] + ) + + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + ORDER BY distance + """, + [embedding] + ) + + # First two should have distance close to 0 + [ + [_first_name, first_dist], + [_second_name, second_dist], + [_third_name, third_dist] + ] = result.rows + + # Both Sydney records should be at distance 0 + assert first_dist < 0.001 + assert second_dist < 0.001 + # Tokyo should be farther (but not necessarily > 0.5 given coordinate ranges) + assert third_dist > first_dist + end + + test "handles query with only NULL embeddings in table" do + # Insert location with NULL embedding + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Unknown1', 0.0, 0.0, NULL, 'Unknown', 'Unknown', datetime('now'), datetime('now')), + ('Unknown2', 0.0, 0.0, NULL, 'Unknown', 'Unknown', datetime('now'), datetime('now')) + """ + ) + + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, embedding + FROM locations + WHERE embedding IS NOT NULL + """ + ) + + assert result.num_rows == 0 + end + + test "handles distance calculation with extreme coordinate values" do + # Insert locations at extreme valid coordinates + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('North Pole', 90.0, 0.0, vector(?), 'North', 'Pole', datetime('now'), datetime('now')), + ('South Pole', -90.0, 0.0, vector(?), 'South', 'Pole', datetime('now'), datetime('now')), + ('Date Line East', 0.0, 180.0, vector(?), 'East', 'Line', datetime('now'), datetime('now')), + ('Date Line West', 0.0, -180.0, vector(?), 'West', 'Line', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([90.0 / 90, 0.0 / 180]), + EctoLibSql.Native.vector([-90.0 / 90, 0.0 / 180]), + EctoLibSql.Native.vector([0.0 / 90, 180.0 / 180]), + EctoLibSql.Native.vector([0.0 / 90, -180.0 / 180]) + ] + ) + + # Query should handle extreme values without error + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + ORDER BY distance + """, + [EctoLibSql.Native.vector([1.0, 0.0])] + ) + + assert result.num_rows == 4 + # All distances should be valid numbers + Enum.each(result.rows, fn [_name, distance] -> + assert is_float(distance) + assert distance >= 0.0 + assert distance <= 2.0 + end) + end + + test "handles large embedding vectors" do + # Create larger embeddings (more realistic for AI models) + # Simulating 128-dimensional embeddings + large_embedding_1 = + EctoLibSql.Native.vector(Enum.map(1..128, fn i -> :math.sin(i / 10.0) end)) + + large_embedding_2 = + EctoLibSql.Native.vector(Enum.map(1..128, fn i -> :math.cos(i / 10.0) end)) + + # Insert with larger embeddings + Ecto.Adapters.SQL.query!( + TestRepo, + """ + CREATE TABLE IF NOT EXISTS locations_large ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT, + embedding F32_BLOB(128), + inserted_at DATETIME + ) + """ + ) + + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations_large (name, embedding, inserted_at) + VALUES (?, vector(?), datetime('now')), (?, vector(?), datetime('now')) + """, + ["Vector1", large_embedding_1, "Vector2", large_embedding_2] + ) + + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations_large + ORDER BY distance + """, + [large_embedding_1] + ) + + assert result.num_rows == 2 + [[first_name, first_dist], [_second_name, second_dist]] = result.rows + assert first_name == "Vector1" + # Distance to itself should be very close to 0 + assert first_dist < 0.001 + # Distance to different vector should be larger + assert second_dist > first_dist + + # Cleanup + Ecto.Adapters.SQL.query!(TestRepo, "DROP TABLE locations_large") + end + + test "handles pagination with distance ordering" do + # Insert 10 locations + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Brisbane', -27.47, 153.03, vector(?), 'Brisbane', 'Australia', datetime('now'), datetime('now')), + ('Adelaide', -34.93, 138.60, vector(?), 'Adelaide', 'Australia', datetime('now'), datetime('now')), + ('Perth', -31.95, 115.86, vector(?), 'Perth', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('Osaka', 34.67, 135.50, vector(?), 'Osaka', 'Japan', datetime('now'), datetime('now')), + ('Kyoto', 35.01, 135.77, vector(?), 'Kyoto', 'Japan', datetime('now'), datetime('now')), + ('New York', 40.71, -74.01, vector(?), 'New York', 'USA', datetime('now'), datetime('now')), + ('Los Angeles', 34.05, -118.24, vector(?), 'Los Angeles', 'USA', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([-27.47 / 90, 153.03 / 180]), + EctoLibSql.Native.vector([-34.93 / 90, 138.60 / 180]), + EctoLibSql.Native.vector([-31.95 / 90, 115.86 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([34.67 / 90, 135.50 / 180]), + EctoLibSql.Native.vector([35.01 / 90, 135.77 / 180]), + EctoLibSql.Native.vector([40.71 / 90, -74.01 / 180]), + EctoLibSql.Native.vector([34.05 / 90, -118.24 / 180]) + ] + ) + + # Get first page (3 results) + page_1 = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + ORDER BY distance + LIMIT 3 + """, + [EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180])] + ) + + assert page_1.num_rows == 3 + page_1_names = Enum.map(page_1.rows, fn [name, _] -> name end) + + # Get second page (next 3 results) + page_2 = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + ORDER BY distance + LIMIT 3 OFFSET 3 + """, + [EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180])] + ) + + assert page_2.num_rows == 3 + page_2_names = Enum.map(page_2.rows, fn [name, _] -> name end) + + # Pages should not overlap + assert page_1_names -- page_2_names == page_1_names + end + + test "handles mixed NULL and valid embeddings in distance query" do + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Unknown', 0.0, 0.0, NULL, 'Unknown', 'Unknown', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')), + ('Mystery', 0.0, 0.0, NULL, 'Mystery', 'Mystery', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]) + ] + ) + + # Should only process non-NULL embeddings + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE embedding IS NOT NULL + ORDER BY distance + """, + [EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180])] + ) + + assert result.num_rows == 3 + names = Enum.map(result.rows, fn [name, _] -> name end) + assert "Sydney" in names + assert "Tokyo" in names + assert "Melbourne" in names + assert "Unknown" not in names + assert "Mystery" not in names + end + end + + describe "vector error cases" do + test "handles mismatched vector dimensions gracefully" do + # This test documents behavior when attempting mismatched dimensions + # Create table with 2D vectors + embedding_2d = EctoLibSql.Native.vector([0.5, 0.5]) + + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES (?, ?, ?, vector(?), ?, ?, datetime('now'), datetime('now')) + """, + ["Sydney", -33.87, 151.21, embedding_2d, "Sydney", "Australia"] + ) + + # Try to query with same dimensional embedding - should work + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + """, + [embedding_2d] + ) + + assert result.num_rows == 1 + assert result.num_rows > 0 + end + + test "handles very large distance thresholds" do + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]) + ] + ) + + # Query with very large threshold - should return all + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE vector_distance_cos(embedding, vector(?)) < 10.0 + ORDER BY distance + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]) + ] + ) + + assert result.num_rows == 2 + end + + test "handles zero distance threshold" do + embedding = EctoLibSql.Native.vector([0.5, 0.5]) + + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES (?, ?, ?, vector(?), ?, ?, datetime('now'), datetime('now')) + """, + ["Sydney", -33.87, 151.21, embedding, "Sydney", "Australia"] + ) + + # Query with zero threshold - should only match exact duplicates + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE vector_distance_cos(embedding, vector(?)) < 0.0001 + ORDER BY distance + """, + [embedding, embedding] + ) + + # Should return the exact match + assert result.num_rows == 1 + end + + test "handles negative distance comparisons gracefully" do + embedding = EctoLibSql.Native.vector([-0.5, 0.5]) + + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES (?, ?, ?, vector(?), ?, ?, datetime('now'), datetime('now')) + """, + ["Sydney", -33.87, 151.21, embedding, "Sydney", "Australia"] + ) + + # Query with negative threshold - should return no results + # (distances are always >= 0) + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + WHERE vector_distance_cos(embedding, vector(?)) < -0.1 + ORDER BY distance + """, + [embedding, embedding] + ) + + assert result.num_rows == 0 + end + + test "handles duplicate removals after distance sorting" do + embedding = EctoLibSql.Native.vector([0.5, 0.5]) + + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('A', 0.0, 0.0, vector(?), 'City A', 'Country A', datetime('now'), datetime('now')), + ('A', 0.0, 0.0, vector(?), 'City A', 'Country A', datetime('now'), datetime('now')), + ('B', 1.0, 1.0, vector(?), 'City B', 'Country B', datetime('now'), datetime('now')) + """, + [embedding, embedding, EctoLibSql.Native.vector([0.6, 0.6])] + ) + + # Query should return both duplicate records (SQL doesn't auto-deduplicate) + result = + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + ORDER BY distance + LIMIT 5 + """, + [embedding] + ) + + assert result.num_rows == 3 + end + end end From 4ab1f102ae00d398d5c45189143e6bf09045a6e0 Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 19:14:19 +1100 Subject: [PATCH 10/12] tests: Add vector transaction tests --- test/vector_geospatial_test.exs | 430 +++++++++++++++++++++++++++++++- 1 file changed, 423 insertions(+), 7 deletions(-) diff --git a/test/vector_geospatial_test.exs b/test/vector_geospatial_test.exs index 86b66465..50026593 100644 --- a/test/vector_geospatial_test.exs +++ b/test/vector_geospatial_test.exs @@ -723,12 +723,28 @@ defmodule Ecto.Vector.GeospatialTest do ) assert result.num_rows == 3 - names = Enum.map(result.rows, fn [name, _] -> name end) - assert "Sydney" in names - assert "Tokyo" in names - assert "Melbourne" in names - assert "Unknown" not in names - assert "Mystery" not in names + + [ + [first_name, first_dist], + [second_name, second_dist], + [third_name, third_dist] + ] = result.rows + + # Sydney should be first (distance ~0 to itself) + assert first_name == "Sydney" + assert first_dist < 0.001 + + # Melbourne should be second (same country, closer) + assert second_name == "Melbourne" + assert second_dist > first_dist + + # Tokyo should be third (different country, farther) + assert third_name == "Tokyo" + assert third_dist > second_dist + + # NULL embeddings should be filtered out + assert "Unknown" not in [first_name, second_name, third_name] + assert "Mystery" not in [first_name, second_name, third_name] end end @@ -759,7 +775,6 @@ defmodule Ecto.Vector.GeospatialTest do ) assert result.num_rows == 1 - assert result.num_rows > 0 end test "handles very large distance thresholds" do @@ -885,4 +900,405 @@ defmodule Ecto.Vector.GeospatialTest do assert result.num_rows == 3 end end + + describe "vector transaction rollback" do + test "rolls back vector insertions on transaction failure" do + # Start with empty table + result_before = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert result_before.rows == [[0]] + + # Attempt transaction that rolls back + {:error, :test_rollback} = + TestRepo.transaction(fn -> + # Insert location with vector embedding + TestRepo.insert!(%Location{ + name: "Test Location", + latitude: 45.0, + longitude: 45.0, + embedding: EctoLibSql.Native.vector([45.0 / 90, 45.0 / 180]), + city: "Test City", + country: "Test Country" + }) + + # Verify insert succeeded within transaction + count_result = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert count_result.rows == [[1]] + + # Rollback the transaction + TestRepo.rollback(:test_rollback) + end) + + # Verify rollback succeeded and location was not persisted + result_after = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert result_after.rows == [[0]] + end + + test "rolls back multiple vector insertions on transaction failure" do + # Insert initial data + TestRepo.insert!(%Location{ + name: "Sydney", + latitude: -33.87, + longitude: 151.21, + embedding: EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + city: "Sydney", + country: "Australia" + }) + + # Verify initial count + result_before = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert result_before.rows == [[1]] + + # Attempt transaction with multiple inserts that rolls back + {:error, :batch_insert_failed} = + TestRepo.transaction(fn -> + locations = [ + %Location{ + name: "Melbourne", + latitude: -37.81, + longitude: 144.96, + embedding: EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + city: "Melbourne", + country: "Australia" + }, + %Location{ + name: "Brisbane", + latitude: -27.47, + longitude: 153.03, + embedding: EctoLibSql.Native.vector([-27.47 / 90, 153.03 / 180]), + city: "Brisbane", + country: "Australia" + } + ] + + # Insert multiple locations + Enum.each(locations, fn loc -> + TestRepo.insert!(loc) + end) + + # Verify inserts succeeded within transaction + count_result = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert count_result.rows == [[3]] + + # Rollback everything + TestRepo.rollback(:batch_insert_failed) + end) + + # Verify only initial location remains after rollback + result_after = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert result_after.rows == [[1]] + + # Verify it's the original Sydney location + location_result = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT name FROM locations ORDER BY name") + + assert location_result.rows == [["Sydney"]] + end + + test "handles savepoint rollback with vector operations" do + # Start fresh + Ecto.Adapters.SQL.query!(TestRepo, "DELETE FROM locations") + + # Insert initial location + TestRepo.insert!(%Location{ + name: "Sydney", + latitude: -33.87, + longitude: 151.21, + embedding: EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + city: "Sydney", + country: "Australia" + }) + + # Verify initial count + result_before = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert result_before.rows == [[1]] + + # Transaction with savepoint rollback + {:error, :manual_savepoint_rollback} = + TestRepo.transaction(fn -> + # Insert Melbourne + TestRepo.insert!(%Location{ + name: "Melbourne", + latitude: -37.81, + longitude: 144.96, + embedding: EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + city: "Melbourne", + country: "Australia" + }) + + # Verify Melbourne was inserted + count_mid = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert count_mid.rows == [[2]] + + # Manual rollback within transaction + TestRepo.rollback(:manual_savepoint_rollback) + end) + + # The rollback should cause the entire transaction to fail + # Verify Sydney is still there but Melbourne is not + result_after = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert result_after.rows == [[1]] + + # Verify it's still Sydney + location_result = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT name FROM locations") + + assert location_result.rows == [["Sydney"]] + end + end + + describe "vector concurrent operations" do + test "handles concurrent vector distance queries safely" do + # Insert test data + TestRepo.insert!(%Location{ + name: "Sydney", + latitude: -33.87, + longitude: 151.21, + embedding: EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + city: "Sydney", + country: "Australia" + }) + + TestRepo.insert!(%Location{ + name: "Melbourne", + latitude: -37.81, + longitude: 144.96, + embedding: EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + city: "Melbourne", + country: "Australia" + }) + + TestRepo.insert!(%Location{ + name: "Tokyo", + latitude: 35.68, + longitude: 139.69, + embedding: EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]), + city: "Tokyo", + country: "Japan" + }) + + # Run concurrent queries + tasks = + Enum.map(1..10, fn i -> + Task.async(fn -> + # Vary the query vector across iterations + query_lat = -33.87 + i * 0.1 + query_lon = 151.21 + i * 0.2 + + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + ORDER BY distance + LIMIT 2 + """, + [ + EctoLibSql.Native.vector([ + query_lat / 90, + query_lon / 180 + ]) + ] + ) + end) + end) + + results = Task.await_many(tasks) + + # All concurrent queries should succeed + assert length(results) == 10 + assert Enum.all?(results, fn result -> result.num_rows == 2 end) + + # Each result should have valid distance values + Enum.each(results, fn result -> + Enum.each(result.rows, fn [_name, distance] -> + assert is_float(distance) + assert distance >= 0.0 + end) + end) + end + + test "handles concurrent vector insertions with transactions" do + # Prepare test data + locations = [ + {"Tokyo", 35.68, 139.69, "Tokyo", "Japan"}, + {"Osaka", 34.67, 135.50, "Osaka", "Japan"}, + {"Kyoto", 35.01, 135.77, "Kyoto", "Japan"}, + {"Bangkok", 13.73, 100.50, "Bangkok", "Thailand"}, + {"Singapore", 1.35, 103.82, "Singapore", "Singapore"} + ] + + # Run concurrent insert transactions + tasks = + Enum.map(locations, fn {name, lat, lon, city, country} -> + Task.async(fn -> + TestRepo.transaction(fn -> + TestRepo.insert!(%Location{ + name: name, + latitude: lat, + longitude: lon, + embedding: EctoLibSql.Native.vector([lat / 90, lon / 180]), + city: city, + country: country + }) + end) + end) + end) + + results = Task.await_many(tasks) + + # All inserts should succeed + assert Enum.all?(results, fn + {:ok, _} -> true + _ -> false + end) + + # Verify all locations were inserted + result = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert result.rows == [[5]] + + # Verify data integrity + names_result = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT name FROM locations ORDER BY name") + + names = Enum.map(names_result.rows, fn [name] -> name end) + + assert length(names) == 5 + + assert Enum.all?(names, fn name -> + name in ["Tokyo", "Osaka", "Kyoto", "Bangkok", "Singapore"] + end) + end + + test "handles concurrent distance queries with different vector dimensions" do + # Insert locations + Ecto.Adapters.SQL.query!( + TestRepo, + """ + INSERT INTO locations (name, latitude, longitude, embedding, city, country, inserted_at, updated_at) + VALUES + ('Sydney', -33.87, 151.21, vector(?), 'Sydney', 'Australia', datetime('now'), datetime('now')), + ('Melbourne', -37.81, 144.96, vector(?), 'Melbourne', 'Australia', datetime('now'), datetime('now')), + ('Tokyo', 35.68, 139.69, vector(?), 'Tokyo', 'Japan', datetime('now'), datetime('now')) + """, + [ + EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + EctoLibSql.Native.vector([-37.81 / 90, 144.96 / 180]), + EctoLibSql.Native.vector([35.68 / 90, 139.69 / 180]) + ] + ) + + # Run queries with different query points concurrently + query_points = [ + [-33.87 / 90, 151.21 / 180], + # Sydney + [-37.81 / 90, 144.96 / 180], + # Melbourne + [35.68 / 90, 139.69 / 180], + # Tokyo + [0.0, 0.0], + # Origin + [1.0, 0.0] + # North Pole projection + ] + + tasks = + Enum.map(query_points, fn point -> + Task.async(fn -> + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT name, vector_distance_cos(embedding, vector(?)) as distance + FROM locations + ORDER BY distance + """, + [EctoLibSql.Native.vector(point)] + ) + end) + end) + + results = Task.await_many(tasks) + + # All queries should succeed and return all 3 locations + assert length(results) == 5 + assert Enum.all?(results, fn result -> result.num_rows == 3 end) + end + + test "handles concurrent read and transaction write operations" do + # Insert initial data + TestRepo.insert!(%Location{ + name: "Sydney", + latitude: -33.87, + longitude: 151.21, + embedding: EctoLibSql.Native.vector([-33.87 / 90, 151.21 / 180]), + city: "Sydney", + country: "Australia" + }) + + # Create reader and writer tasks + readers = + Enum.map(1..5, fn _ -> + Task.async(fn -> + Ecto.Adapters.SQL.query!( + TestRepo, + """ + SELECT COUNT(*) FROM locations + """ + ) + end) + end) + + writers = + Enum.map( + ["Melbourne", "Brisbane", "Adelaide"], + fn name -> + Task.async(fn -> + TestRepo.transaction(fn -> + TestRepo.insert!(%Location{ + name: name, + latitude: 0.0, + longitude: 0.0, + embedding: EctoLibSql.Native.vector([0.0, 0.0]), + city: name, + country: "Australia" + }) + end) + end) + end + ) + + # Wait for all tasks + read_results = Task.await_many(readers) + write_results = Task.await_many(writers) + + # All operations should succeed + assert Enum.all?(read_results, fn result -> result.num_rows > 0 end) + assert Enum.all?(write_results, fn result -> match?({:ok, _}, result) end) + + # Final count should be initial + inserted + final_result = + Ecto.Adapters.SQL.query!(TestRepo, "SELECT COUNT(*) FROM locations") + + assert final_result.rows == [[4]] + end + end end From 2e9049d2ae527aa9f73655c886a1a68d4e15c0b4 Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 21:58:39 +1100 Subject: [PATCH 11/12] tests: Add comment about non-obvious type --- test/vector_geospatial_test.exs | 1 + 1 file changed, 1 insertion(+) diff --git a/test/vector_geospatial_test.exs b/test/vector_geospatial_test.exs index 50026593..0524b8e1 100644 --- a/test/vector_geospatial_test.exs +++ b/test/vector_geospatial_test.exs @@ -16,6 +16,7 @@ defmodule Ecto.Vector.GeospatialTest do field(:name, :string) field(:latitude, :float) field(:longitude, :float) + # Binary vector data stored as F32_BLOB field(:embedding, :string) field(:city, :string) field(:country, :string) From 27e1bec160877c078b82461fd83b7c0d76539aac Mon Sep 17 00:00:00 2001 From: Drew Robinson Date: Wed, 17 Dec 2025 22:57:13 +1100 Subject: [PATCH 12/12] tests: Improve vector test name --- test/vector_geospatial_test.exs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/vector_geospatial_test.exs b/test/vector_geospatial_test.exs index 0524b8e1..00b9efa6 100644 --- a/test/vector_geospatial_test.exs +++ b/test/vector_geospatial_test.exs @@ -750,8 +750,8 @@ defmodule Ecto.Vector.GeospatialTest do end describe "vector error cases" do - test "handles mismatched vector dimensions gracefully" do - # This test documents behavior when attempting mismatched dimensions + test "queries vectors with matching dimensions" do + # Test successful query with matching 2D vectors # Create table with 2D vectors embedding_2d = EctoLibSql.Native.vector([0.5, 0.5])