We design a self‑tuning database engine that adapts its execution strategies based on runtime feedback, mimicking bacterial riboswitches (which change conformation in response to metabolites). The engine monitors query patterns and, when a pattern reaches a “quorum” (frequency threshold), it triggers a tuning action: building an index, rewriting a query, or materializing a subquery.
The colony provides:
- Riboswitch: a condition (frequency > threshold) that flips the execution plan.
- Quorum sensing: collective decision to trigger tuning when many similar queries are observed.
- Biofilm memory: caches the tuned plan for future queries.
import random
import time
from collections import defaultdict
from dataclasses import dataclass
from typing import List, Dict
@dataclass
class Query:
sql: str
tables: List[str]
selectivity: float # estimated fraction of rows returned
execution_time: float # in seconds (simulated)
class SelfTuningEngine:
def __init__(self, quorum_threshold=5, tuning_interval=10):
self.quorum_threshold = quorum_threshold
self.tuning_interval = tuning_interval # seconds between tuning checks
self.query_counts = defaultdict(int) # frequency of SQL patterns
self.indexes = {} # table -> list of indexed columns
self.materialized_views = {} # view name -> query definition
self.last_tuning_time = time.time()
self.total_queries = 0
def normalize_sql(self, sql: str) -> str:
"""Remove constants to get a pattern (e.g., 'SELECT * FROM t WHERE x = ?')"""
import re
# Replace numeric/string literals with '?'
pattern = re.sub(r"\b\d+\b", "?", sql)
pattern = re.sub(r"'[^']*'", "?", pattern)
return pattern
def update_statistics(self, query: Query):
pattern = self.normalize_sql(query.sql)
self.query_counts[pattern] += 1
self.total_queries += 1
def should_tune(self) -> bool:
now = time.time()
if now - self.last_tuning_time >= self.tuning_interval:
self.last_tuning_time = now
return True
return False
def riboswitch_decision(self, pattern: str) -> bool:
# If pattern frequency exceeds quorum threshold, trigger tuning
return self.query_counts[pattern] >= self.quorum_threshold
def recommend_index(self, query: Query):
# Simple heuristic: index the first column in WHERE clause (simulated)
# In real system, would analyze predicates
if "WHERE" in query.sql:
import re
match = re.search(r"WHERE\s+(\w+)\s*=", query.sql)
if match:
col = match.group(1)
table = query.tables[0]
return (table, col)
return None
def build_index(self, table, column):
if table not in self.indexes:
self.indexes[table] = []
if column not in self.indexes[table]:
self.indexes[table].append(column)
print(f"🏗️ Built index on {table}({column})")
def materialize_view(self, pattern: str, query: Query):
if pattern not in self.materialized_views:
self.materialized_views[pattern] = query.sql
print(f"📦 Materialized view for pattern: {query.sql[:50]}...")
def execute(self, query: Query) -> float:
# Simulate execution with optional index or view
self.update_statistics(query)
pattern = self.normalize_sql(query.sql)
# Check if we have a materialized view for this pattern
if pattern in self.materialized_views:
print(f"⚡ Using materialized view for: {query.sql[:40]}...")
# Materialized view reduces execution time
actual_time = query.execution_time * 0.1
else:
# Check if index exists
idx = self.recommend_index(query)
if idx and idx[1] in self.indexes.get(idx[0], []):
print(f"🔍 Using index on {idx[0]}({idx[1]}) for: {query.sql[:40]}...")
actual_time = query.execution_time * 0.3
else:
actual_time = query.execution_time
print(f"🐢 Full table scan: {query.sql[:40]}... (time {actual_time:.3f}s)")
# After execution, if enough queries have been seen, consider tuning
if self.should_tune() and self.riboswitch_decision(pattern):
print(f"\n🔔 Quorum reached for pattern: {pattern[:50]}...")
# Tuning action: recommend index or materialize view
if query.selectivity < 0.1: # selective query
idx = self.recommend_index(query)
if idx:
self.build_index(*idx)
else:
# Less selective, materialize view
self.materialize_view(pattern, query)
return actual_time
# ----------------------------------------------------------------------
# Simulation
# ----------------------------------------------------------------------
def simulate():
engine = SelfTuningEngine(quorum_threshold=3, tuning_interval=2)
# Sample queries
queries = [
Query("SELECT * FROM orders WHERE customer_id = 123", ["orders"], 0.05, 1.0),
Query("SELECT * FROM orders WHERE customer_id = 456", ["orders"], 0.05, 1.0),
Query("SELECT * FROM orders WHERE customer_id = 789", ["orders"], 0.05, 1.0),
Query("SELECT * FROM products WHERE category = 'Electronics'", ["products"], 0.2, 0.8),
Query("SELECT * FROM products WHERE category = 'Books'", ["products"], 0.2, 0.8),
Query("SELECT * FROM products WHERE category = 'Clothing'", ["products"], 0.2, 0.8),
Query("SELECT * FROM logs WHERE severity = 'ERROR'", ["logs"], 0.01, 2.0),
Query("SELECT * FROM logs WHERE severity = 'ERROR'", ["logs"], 0.01, 2.0),
Query("SELECT * FROM logs WHERE severity = 'ERROR'", ["logs"], 0.01, 2.0),
]
# Execute each query multiple times to simulate workload
for i in range(5):
for q in queries:
time_taken = engine.execute(q)
# Simulate some delay
time.sleep(0.1)
print("\n=== Final Statistics ===")
print(f"Total queries processed: {engine.total_queries}")
print(f"Indexes built: {engine.indexes}")
print(f"Materialized views: {list(engine.materialized_views.keys())}")
if __name__ == "__main__":
simulate()- Normalize SQL – Replace constants with
?to create a pattern (e.g.,SELECT * FROM t WHERE x = ?). - Count patterns – Each query pattern increments a counter (quorum sensing).
- Riboswitch decision – When a pattern’s count reaches
quorum_threshold, the engine triggers tuning. - Tuning actions:
- For selective queries (estimated selectivity < 0.1), it builds an index on the first WHERE column.
- For less selective queries, it materializes the view.
- Execution – If an index or materialized view exists, the simulated execution time decreases.
- Periodic checks – Tuning occurs every
tuning_intervalseconds to avoid overwhelming the system.
🐢 Full table scan: SELECT * FROM orders WHERE customer_id = 123... (time 1.000s)
🐢 Full table scan: SELECT * FROM orders WHERE customer_id = 456... (time 1.000s)
🐢 Full table scan: SELECT * FROM orders WHERE customer_id = 789... (time 1.000s)
🔔 Quorum reached for pattern: SELECT * FROM orders WHERE customer_id = ?...
🏗️ Built index on orders(customer_id)
🔍 Using index on orders(customer_id) for: SELECT * FROM orders WHERE customer_id = 123... (time 0.300s)
🔍 Using index on orders(customer_id) for: SELECT * FROM orders WHERE customer_id = 456... (time 0.300s)
...
After the quorum is reached, subsequent queries benefit from the new index, reducing execution time from 1.0s to 0.3s. Similarly, for other patterns, materialized views are created.
- More sophisticated tuning – Add join order optimization, partition pruning, or automatic statistics refresh.
- Quorum decay – Older patterns gradually lose weight (like autoinducer degradation) to adapt to changing workloads.
- Cost model – Use actual execution time and resource usage (CPU, I/O) instead of simulated selectivity.
- Integration with PostgreSQL or MySQL – This algorithm could be implemented as an extension that monitors the query cache and issues
CREATE INDEXorCREATE MATERIALIZED VIEWcommands.
The colony’s riboswitch and quorum sensing principles provide a simple, effective mechanism for self‑tuning databases that adapt to real‑time workload patterns without manual DBA intervention. This is a practical step toward autonomous database systems.