Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyobvector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
* st_astext GIS function: return a Point in human-readable format
* FtsParser Text Parser Type for Full Text Search
* FtsIndexParam Full Text Search index parameter
* make_analyzer_properties Build parser_properties string for built-in analyzer parser
* MatchAgainst Full Text Search clause
"""

Expand Down Expand Up @@ -104,5 +105,6 @@
"OceanBase",
"FtsParser",
"FtsIndexParam",
"make_analyzer_properties",
"MatchAgainst",
]
4 changes: 3 additions & 1 deletion pyobvector/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
* ObSubKeyPartition Specify Key subpartition info
* FtsParser Text Parser Type for Full Text Search
* FtsIndexParam Full Text Search index parameter
* make_analyzer_properties Build parser_properties string for built-in analyzer parser
"""

import os
Expand All @@ -44,7 +45,7 @@
from .schema_type import DataType
from .collection_schema import FieldSchema, CollectionSchema
from .partitions import *
from .fts_index_param import FtsParser, FtsIndexParam
from .fts_index_param import FtsParser, FtsIndexParam, make_analyzer_properties


def _resolve_password(password: str) -> str:
Expand Down Expand Up @@ -114,4 +115,5 @@ def SeekdbRemoteClient(
"ObSubKeyPartition",
"FtsParser",
"FtsIndexParam",
"make_analyzer_properties",
]
41 changes: 39 additions & 2 deletions pyobvector/client/fts_index_param.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""A module to specify fts index parameters"""

import json
from enum import Enum


Expand All @@ -11,6 +12,7 @@ class FtsParser(Enum):
NGRAM2 = 2 # NGRAM2 parser (supported from V4.3.5 BP2+)
BASIC_ENGLISH = 3 # Basic English parser
JIEBA = 4 # jieba parser
ANALYZER = 5 # analyzer parser with PARSER_PROPERTIES support


class FtsIndexParam:
Expand All @@ -19,19 +21,22 @@ class FtsIndexParam:
Args:
index_name: Index name
field_names: List of field names to create full-text index on
parser_type: Parser type, can be FtsParser enum or string (for custom parsers)
If None, uses default Space parser
parser_properties: Content placed inside PARSER_PROPERTIES = (...) in the DDL.
When set and parser_type is not None, the clause is appended.
Required for FtsParser.ANALYZER; optional for others.
"""

def __init__(
self,
index_name: str,
field_names: list[str],
parser_type: FtsParser | str | None = None,
parser_properties: str | None = None,
):
self.index_name = index_name
self.field_names = field_names
self.parser_type = parser_type
self.parser_properties = parser_properties

def param_str(self) -> str | None:
"""Convert parser type to string format for SQL."""
Expand All @@ -40,6 +45,15 @@ def param_str(self) -> str | None:

if isinstance(self.parser_type, str):
# Custom parser name (e.g., "thai_ftparser")
if (
self.parser_type.lower() == "analyzer"
and self.parser_properties is None
):
raise ValueError(
'parser_type "analyzer" requires parser_properties '
"(OceanBase rejects WITH PARSER analyzer without PARSER_PROPERTIES). "
'Example value: analysis = \'{"analyzer": "standard"}\''
)
return self.parser_type.lower()

if isinstance(self.parser_type, FtsParser):
Expand All @@ -53,6 +67,14 @@ def param_str(self) -> str | None:
return "beng"
if self.parser_type == FtsParser.JIEBA:
return "jieba"
if self.parser_type == FtsParser.ANALYZER:
if self.parser_properties is None:
raise ValueError(
"FtsParser.ANALYZER requires parser_properties "
"(OceanBase rejects WITH PARSER analyzer without PARSER_PROPERTIES). "
'Example value: analysis = \'{"analyzer": "standard"}\''
)
return "analyzer"
Comment thread
whhe marked this conversation as resolved.
# Raise exception for unrecognized FtsParser enum values
raise ValueError(f"Unrecognized FtsParser enum value: {self.parser_type}")

Expand All @@ -63,6 +85,8 @@ def __iter__(self):
yield "field_names", self.field_names
if self.parser_type:
yield "parser_type", self.parser_type
if self.parser_properties is not None:
yield "parser_properties", self.parser_properties

def __str__(self):
return str(dict(self))
Expand All @@ -74,3 +98,16 @@ def __eq__(self, other: object) -> bool:
if isinstance(other, dict):
return dict(self) == other
return False


def make_analyzer_properties(analyzer_type: str = "standard") -> str:
"""Build the parser_properties string for a built-in analyzer parser.

Args:
analyzer_type: Analyzer name. Defaults to "standard".

Returns:
A string suitable for FtsIndexParam(parser_properties=...), e.g.
``analysis = '{"analyzer": "standard"}'``
"""
return f"analysis = '{json.dumps({'analyzer': analyzer_type})}'"
11 changes: 7 additions & 4 deletions pyobvector/client/ob_vec_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,16 @@ def create_table_with_index_params(
vidx.create(self.engine, checkfirst=True)
# create fts indexes
if fts_idxs is not None:
for fts_idx in fts_idxs:
for fts_idx_param in fts_idxs:
idx_cols = [
table.c[field_name] for field_name in fts_idx.field_names
table.c[field_name]
for field_name in fts_idx_param.field_names
]
fts_idx = FtsIndex(
fts_idx.index_name,
fts_idx.param_str(),
fts_idx_param.index_name,
fts_idx_param.param_str(),
*idx_cols,
parser_properties=fts_idx_param.parser_properties,
)
fts_idx.create(self.engine, checkfirst=True)

Expand Down Expand Up @@ -227,6 +229,7 @@ def create_fts_idx_with_fts_index_param(
fts_idx_param.index_name,
fts_idx_param.param_str(),
*idx_cols,
parser_properties=fts_idx_param.parser_properties,
)
fts_idx.create(self.engine, checkfirst=True)

Expand Down
37 changes: 32 additions & 5 deletions pyobvector/schema/full_text_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,36 @@ def visit_fts_index(self, index, create_ok=False):


class FtsIndex(Index):
"""Fts Index schema."""
"""Fts Index schema.

Args:
name: Index name.
fts_parser: Parser name (e.g. "ngram", "ik", "analyzer").
column_names: Columns to index.
parser_properties: Content placed inside PARSER_PROPERTIES = (...) in the DDL.
Required when fts_parser is "analyzer"; raises ValueError if omitted.
"""
Comment thread
whhe marked this conversation as resolved.

__visit_name__ = "fts_index"

def __init__(self, name, fts_parser: str, *column_names, **kw):
def __init__(
self,
name,
fts_parser: str | None,
*column_names,
parser_properties: str | None = None,
**kw,
):
if isinstance(fts_parser, str):
fts_parser = fts_parser.lower()
if fts_parser == "analyzer" and parser_properties is None:
raise ValueError(
'FtsIndex with fts_parser="analyzer" requires parser_properties '
"(OceanBase rejects WITH PARSER analyzer without PARSER_PROPERTIES). "
'Example value: analysis = \'{"analyzer": "standard"}\''
)
self.fts_parser = fts_parser
self.parser_properties = parser_properties
super().__init__(name, *column_names, **kw)
Comment thread
whhe marked this conversation as resolved.

def create(self, bind, checkfirst: bool = False) -> None:
Expand All @@ -59,6 +83,9 @@ def compile_create_fts_index(element, compiler, **kw): # pylint: disable=unused
table_name = index.table.name
column_list = ", ".join([column.name for column in index.columns])
fts_parser = index.fts_parser
if fts_parser is not None:
return f"CREATE FULLTEXT INDEX {index.name} ON {table_name} ({column_list}) WITH PARSER {fts_parser}"
return f"CREATE FULLTEXT INDEX {index.name} ON {table_name} ({column_list})"
if fts_parser is None:
return f"CREATE FULLTEXT INDEX {index.name} ON {table_name} ({column_list})"
sql = f"CREATE FULLTEXT INDEX {index.name} ON {table_name} ({column_list}) WITH PARSER {fts_parser}"
if index.parser_properties is not None:
sql += f" PARSER_PROPERTIES=({index.parser_properties})"
return sql
Comment thread
whhe marked this conversation as resolved.
142 changes: 140 additions & 2 deletions tests/test_fts_index.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
import unittest
from pyobvector import *
from sqlalchemy import Column, Integer, text
from pyobvector import (
ObVecClient,
FtsIndexParam,
FtsParser,
MatchAgainst,
make_analyzer_properties,
)
from pyobvector.schema.full_text_index import (
FtsIndex,
CreateFtsIndex,
compile_create_fts_index,
)
from sqlalchemy import Column, Integer, MetaData, Table, text
from sqlalchemy.dialects.mysql import TEXT
import logging

logger = logging.getLogger(__name__)


class _MockCompiler:
"""Minimal stub passed to compile_create_fts_index (unused by the function)."""


class ObFtsIndexTest(unittest.TestCase):
def setUp(self) -> None:
self.client = ObVecClient()
Expand Down Expand Up @@ -498,3 +513,126 @@ def test_fts_create_after_insert(self):
)

self.client.drop_table_if_exist(test_collection_name)


class FtsAnalyzerCompilationTest(unittest.TestCase):
"""Unit tests for ANALYZER parser SQL compilation — no DB connection required."""

def _build_index(self, parser_properties: str) -> FtsIndex:
meta = MetaData()
table = Table(
"articles",
meta,
Column("id", Integer, primary_key=True),
Column("body", TEXT),
)
return FtsIndex(
"ft_idx_body",
"analyzer",
table.c["body"],
parser_properties=parser_properties,
)

def test_fts_analyzer_param_str(self):
param = FtsIndexParam(
index_name="ft_idx_body",
field_names=["body"],
parser_type=FtsParser.ANALYZER,
parser_properties='analysis = \'{"analyzer": "standard"}\'',
)
self.assertEqual(param.param_str(), "analyzer")

def test_fts_analyzer_with_parser_properties(self):
props = 'analysis = \'{"analyzer": "standard"}\''
param = FtsIndexParam(
index_name="ft_idx_body",
field_names=["body"],
parser_type=FtsParser.ANALYZER,
parser_properties=props,
)
self.assertEqual(param.parser_properties, props)
param_dict = dict(param)
self.assertEqual(param_dict["parser_properties"], props)

def test_fts_analyzer_sql_with_properties(self):
props = 'analysis = \'{"analyzer": "standard"}\''
idx = self._build_index(props)
sql = compile_create_fts_index(CreateFtsIndex(idx), _MockCompiler())
self.assertIn("WITH PARSER analyzer", sql)
self.assertIn(f"PARSER_PROPERTIES = ({props})", sql)

def test_fts_analyzer_requires_parser_properties(self):
param = FtsIndexParam(
index_name="ft_idx_body",
field_names=["body"],
parser_type=FtsParser.ANALYZER,
)
with self.assertRaises(ValueError):
param.param_str()

def test_fts_parser_properties_not_tied_to_analyzer_type(self):
# parser_properties can be used with any parser type, not only ANALYZER
meta = MetaData()
table = Table(
"articles",
meta,
Column("id", Integer, primary_key=True),
Column("body", TEXT),
)
idx = FtsIndex(
"ft_idx_body",
"ngram",
table.c["body"],
parser_properties="token_size = 2",
)
sql = compile_create_fts_index(CreateFtsIndex(idx), _MockCompiler())
self.assertIn("WITH PARSER ngram", sql)
self.assertIn("PARSER_PROPERTIES = (token_size = 2)", sql)

def test_make_analyzer_properties_default(self):
result = make_analyzer_properties()
self.assertEqual(result, 'analysis = \'{"analyzer": "standard"}\'')

def test_make_analyzer_properties_custom(self):
result = make_analyzer_properties("ik_smart")
self.assertEqual(result, 'analysis = \'{"analyzer": "ik_smart"}\'')

def test_make_analyzer_properties_integrates_with_fts_index_param(self):
param = FtsIndexParam(
index_name="ft_idx_body",
field_names=["body"],
parser_type=FtsParser.ANALYZER,
parser_properties=make_analyzer_properties(),
)
self.assertEqual(param.param_str(), "analyzer")
self.assertIn('"analyzer": "standard"', param.parser_properties)

def test_fts_index_direct_analyzer_requires_parser_properties(self):
meta = MetaData()
table = Table(
"articles",
meta,
Column("id", Integer, primary_key=True),
Column("body", TEXT),
)
with self.assertRaises(ValueError):
FtsIndex("ft_idx_body", "analyzer", table.c["body"])

def test_fts_index_param_str_string_analyzer_requires_parser_properties(self):
param = FtsIndexParam(
index_name="ft_idx_body",
field_names=["body"],
parser_type="analyzer",
)
with self.assertRaises(ValueError):
param.param_str()

def test_fts_index_param_str_string_analyzer_with_parser_properties(self):
props = 'analysis = \'{"analyzer": "standard"}\''
param = FtsIndexParam(
index_name="ft_idx_body",
field_names=["body"],
parser_type="analyzer",
parser_properties=props,
)
self.assertEqual(param.param_str(), "analyzer")
Loading