diff --git a/.env b/.env
deleted file mode 100644
index cdcc756..0000000
--- a/.env
+++ /dev/null
@@ -1 +0,0 @@
-OPENROUTER_API_KEY = APIKEY
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index e69de29..0c2ad09 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+.env
diff --git a/SQL/instructions_for_creation.md b/SQL/instructions_for_creation.md
new file mode 100644
index 0000000..1110b26
--- /dev/null
+++ b/SQL/instructions_for_creation.md
@@ -0,0 +1,2 @@
+# Para criar o banco:
+mysql -u root -p -e "CREATE DATABASE IF NOT EXISTS pybot CHARACTER SET utf8mb4;"
\ No newline at end of file
diff --git a/SQL/schema.sql b/SQL/schema.sql
new file mode 100644
index 0000000..1c73b4b
--- /dev/null
+++ b/SQL/schema.sql
@@ -0,0 +1,10 @@
+CREATE TABLE IF NOT EXISTS knowledge (
+ id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
+ title VARCHAR(255) NOT NULL,
+ content TEXT NOT NULL,
+ category VARCHAR(100) NOT NULL DEFAULT 'geral',
+ active TINYINT(1) NOT NULL DEFAULT 1,
+ created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ INDEX idx_active_category (active, category)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
\ No newline at end of file
diff --git a/__pycache__/app.cpython-314.pyc b/__pycache__/app.cpython-314.pyc
new file mode 100644
index 0000000..8d62d35
Binary files /dev/null and b/__pycache__/app.cpython-314.pyc differ
diff --git a/__pycache__/main.cpython-314.pyc b/__pycache__/main.cpython-314.pyc
new file mode 100644
index 0000000..cbfc496
Binary files /dev/null and b/__pycache__/main.cpython-314.pyc differ
diff --git a/api/__pycache__/__init__.cpython-314.pyc b/api/__pycache__/__init__.cpython-314.pyc
index c327023..77c4f5b 100644
Binary files a/api/__pycache__/__init__.cpython-314.pyc and b/api/__pycache__/__init__.cpython-314.pyc differ
diff --git a/api/__pycache__/routes.cpython-314.pyc b/api/__pycache__/routes.cpython-314.pyc
index af7eb8d..a002356 100644
Binary files a/api/__pycache__/routes.cpython-314.pyc and b/api/__pycache__/routes.cpython-314.pyc differ
diff --git a/api/routes.py b/api/routes.py
index 0ae7448..cb24c0f 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -8,6 +8,8 @@
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import HTMLResponse, StreamingResponse
+from collections.abc import AsyncGenerator
+
log = logging.getLogger("kernelbots.api.chat")
router = APIRouter()
@@ -75,6 +77,28 @@ async def chat(request: Request) -> StreamingResponse:
detail="Campo 'session_id' deve ser string ou omitido.",
)
+ if user_message.strip().lower() == "/reload":
+ log.info("🔄 Comando /reload recebido — reconstruindo Ăndice BM25...")
+ services.search_engine.rebuild()
+ chunk_count = len(services.search_engine.chunks)
+ db_count = sum(1 for c in services.search_engine.chunks if c.get("source", "").startswith("db:"))
+ md_count = chunk_count - db_count
+ status = (
+ f"ĂŤndice reconstruĂdo: {chunk_count} chunk(s) total "
+ f"({md_count} de arquivos .md + {db_count} do MySQL)."
+ )
+ log.info("âś… /reload concluĂdo — %s", status)
+
+ async def _reload_stream() -> AsyncGenerator[str, None]:
+ yield f"data: {status}\n\n"
+ yield "data: [DONE]\n\n"
+
+ return StreamingResponse(
+ _reload_stream(),
+ media_type="text/event-stream",
+ headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no", "Connection": "keep-alive"},
+ )
+
built = services.context_manager.build_messages(
user_message,
discipline_filter=discipline,
diff --git a/app/__pycache__/__init__.cpython-314.pyc b/app/__pycache__/__init__.cpython-314.pyc
index b4a0e4f..db1147b 100644
Binary files a/app/__pycache__/__init__.cpython-314.pyc and b/app/__pycache__/__init__.cpython-314.pyc differ
diff --git a/app/__pycache__/factory.cpython-314.pyc b/app/__pycache__/factory.cpython-314.pyc
index b32b8b8..9c43984 100644
Binary files a/app/__pycache__/factory.cpython-314.pyc and b/app/__pycache__/factory.cpython-314.pyc differ
diff --git a/app/__pycache__/state.cpython-314.pyc b/app/__pycache__/state.cpython-314.pyc
index 58d7902..98972be 100644
Binary files a/app/__pycache__/state.cpython-314.pyc and b/app/__pycache__/state.cpython-314.pyc differ
diff --git a/core/__pycache__/__init__.cpython-314.pyc b/core/__pycache__/__init__.cpython-314.pyc
index 2c6cbb1..9cbd5b2 100644
Binary files a/core/__pycache__/__init__.cpython-314.pyc and b/core/__pycache__/__init__.cpython-314.pyc differ
diff --git a/core/__pycache__/config.cpython-314.pyc b/core/__pycache__/config.cpython-314.pyc
index 463a9a0..e40be64 100644
Binary files a/core/__pycache__/config.cpython-314.pyc and b/core/__pycache__/config.cpython-314.pyc differ
diff --git a/core/__pycache__/logging_config.cpython-314.pyc b/core/__pycache__/logging_config.cpython-314.pyc
index 69ccfac..c990bc2 100644
Binary files a/core/__pycache__/logging_config.cpython-314.pyc and b/core/__pycache__/logging_config.cpython-314.pyc differ
diff --git a/core/config.py b/core/config.py
index 096b381..a20593a 100644
--- a/core/config.py
+++ b/core/config.py
@@ -27,6 +27,11 @@ class Settings:
pinned_max_turns: int
pinned_max_chars: int
pinned_weak_score: float
+ db_host: str
+ db_port: int
+ db_name: str
+ db_user: str
+ db_password: str
@property
def openrouter_headers(self) -> dict[str, str]:
@@ -87,6 +92,23 @@ def load(cls) -> Settings:
raise RuntimeError("ACL_PINNED_WEAK_SCORE deve ser um nĂşmero.") from None
pinned_weak_score = max(0.05, min(0.95, pinned_weak_score))
+ """ !Credenciais do banco! """
+
+ db_host = (os.getenv("DB_HOST") or "").strip()
+
+ db_port_raw = (os.getenv("DB_PORT") or "3306").strip()
+
+ try:
+ db_port = int(db_port_raw)
+ except ValueError:
+ raise RuntimeError("DB_PORT deve ser um inteiro.") from None
+
+ db_name = (os.getenv("DB_NAME") or "").strip()
+
+ db_user = (os.getenv("DB_USER") or "").strip()
+
+ db_password = (os.getenv("DB_PASSWORD") or "").strip()
+
return cls(
openrouter_api_key=key,
project_root=project_root,
@@ -100,4 +122,9 @@ def load(cls) -> Settings:
pinned_max_turns=pinned_max_turns,
pinned_max_chars=pinned_max_chars,
pinned_weak_score=pinned_weak_score,
+ db_host=db_host,
+ db_port=db_port,
+ db_name=db_name,
+ db_user=db_user,
+ db_password=db_password,
)
diff --git a/engine/__pycache__/__init__.cpython-314.pyc b/engine/__pycache__/__init__.cpython-314.pyc
index 5418816..ece1cbe 100644
Binary files a/engine/__pycache__/__init__.cpython-314.pyc and b/engine/__pycache__/__init__.cpython-314.pyc differ
diff --git a/engine/__pycache__/chat_provider.cpython-314.pyc b/engine/__pycache__/chat_provider.cpython-314.pyc
index 554a383..347a60b 100644
Binary files a/engine/__pycache__/chat_provider.cpython-314.pyc and b/engine/__pycache__/chat_provider.cpython-314.pyc differ
diff --git a/engine/__pycache__/context.cpython-314.pyc b/engine/__pycache__/context.cpython-314.pyc
index 4c471d1..b368c69 100644
Binary files a/engine/__pycache__/context.cpython-314.pyc and b/engine/__pycache__/context.cpython-314.pyc differ
diff --git a/engine/__pycache__/database.cpython-314.pyc b/engine/__pycache__/database.cpython-314.pyc
new file mode 100644
index 0000000..921aae6
Binary files /dev/null and b/engine/__pycache__/database.cpython-314.pyc differ
diff --git a/engine/__pycache__/pinned_store.cpython-314.pyc b/engine/__pycache__/pinned_store.cpython-314.pyc
index 953f467..5834e63 100644
Binary files a/engine/__pycache__/pinned_store.cpython-314.pyc and b/engine/__pycache__/pinned_store.cpython-314.pyc differ
diff --git a/engine/__pycache__/search.cpython-314.pyc b/engine/__pycache__/search.cpython-314.pyc
index 44dd406..7ab519c 100644
Binary files a/engine/__pycache__/search.cpython-314.pyc and b/engine/__pycache__/search.cpython-314.pyc differ
diff --git a/engine/__pycache__/watcher.cpython-314.pyc b/engine/__pycache__/watcher.cpython-314.pyc
index 7162436..7679849 100644
Binary files a/engine/__pycache__/watcher.cpython-314.pyc and b/engine/__pycache__/watcher.cpython-314.pyc differ
diff --git a/engine/database.py b/engine/database.py
new file mode 100644
index 0000000..f20c798
--- /dev/null
+++ b/engine/database.py
@@ -0,0 +1,84 @@
+"""Fonte de dados MySQL para o Ăndice BM25."""
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from core.config import Settings
+
+log = logging.getLogger(f"kernelbots.{__name__}")
+
+DB_CHUNK_WORDS = 500
+DB_CHUNK_OVERLAP = 50
+
+
+def _chunk_text(text: str, title: str, source: str) -> list[dict]:
+ """Divide texto em janelas de ~500 palavras com overlap de 50."""
+ words = text.split()
+ if not words:
+ return []
+ chunks: list[dict] = []
+ start = 0
+ while start < len(words):
+ end = min(start + DB_CHUNK_WORDS, len(words))
+ chunks.append({
+ "text": f"{title}\n" + " ".join(words[start:end]),
+ "source": source,
+ "discipline": "db",
+ })
+ if end == len(words):
+ break
+ start += DB_CHUNK_WORDS - DB_CHUNK_OVERLAP
+ return chunks
+
+
+def fetch_db_chunks(settings: Settings) -> list[dict]:
+ """
+ Busca rows ativas da tabela knowledge e retorna lista de chunks BM25.
+ Retorna [] com warning se o DB nĂŁo estiver configurado ou falhar.
+ """
+ if not all([settings.db_host, settings.db_name, settings.db_user]):
+ log.debug("Variáveis DB_* não configuradas — pulando fonte MySQL.")
+ return []
+
+ try:
+ import pymysql
+ import pymysql.cursors
+ except ImportError:
+ log.warning("PyMySQL não instalado — fonte MySQL desativada.")
+ return []
+
+ try:
+ conn = pymysql.connect(
+ host=settings.db_host,
+ port=settings.db_port,
+ database=settings.db_name,
+ user=settings.db_user,
+ password=settings.db_password,
+ charset="utf8mb4",
+ cursorclass=pymysql.cursors.DictCursor,
+ connect_timeout=5,
+ read_timeout=10,
+ )
+ with conn:
+ with conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT id, title, content, category "
+ "FROM knowledge WHERE active = 1 ORDER BY id"
+ )
+ rows = cursor.fetchall()
+
+ all_chunks: list[dict] = []
+ for row in rows:
+ source = f"db:{row['category']}"
+ chunks = _chunk_text(row["content"], row["title"], source)
+ all_chunks.extend(chunks)
+ log.debug(" 🗄 row id=%s '%s' → %s chunk(s)", row["id"], row["title"], len(chunks))
+
+ log.info(" 🗄 MySQL: %s row(s) → %s chunk(s) carregados", len(rows), len(all_chunks))
+ return all_chunks
+
+ except Exception:
+ log.warning("⚠Falha ao conectar ao MySQL — continuando apenas com .md.", exc_info=True)
+ return []
\ No newline at end of file
diff --git a/engine/search.py b/engine/search.py
index ebf036a..5a10203 100644
--- a/engine/search.py
+++ b/engine/search.py
@@ -12,6 +12,8 @@
from rank_bm25 import BM25Okapi
from core.config import GlobalContextMode
+from core.config import Settings
+from engine.database import fetch_db_chunks
log = logging.getLogger(f"kernelbots.{__name__}")
@@ -26,10 +28,12 @@ def __init__(
content_dir: Path,
score_threshold: float,
global_context_mode: GlobalContextMode = "geral",
+ settings: Settings | None = None, # <-- adicionar
) -> None:
self._content_dir = content_dir.resolve()
self._score_threshold = score_threshold
self._global_context_mode: GlobalContextMode = global_context_mode
+ self._settings = settings
self._lock = threading.RLock()
self._silos: dict[str, dict[str, Any]] = {}
self._discipline_ids: frozenset[str] = frozenset()
@@ -151,6 +155,15 @@ def rebuild(self) -> None:
log.warning(
"⚠Nenhum .md indexado — BM25 desativado. Modo assistente geral ativo."
)
+
+ # --- chunks do MySQL (silo "db") ---
+ db_chunks: list[dict] = []
+ if self._settings is not None:
+ db_chunks = fetch_db_chunks(self._settings)
+ if db_chunks:
+ tokenized_db = [self._tokenize(c["text"]) for c in db_chunks]
+ new_silos["db"] = {"chunks": db_chunks, "bm25": BM25Okapi(tokenized_db)}
+ all_chunks.extend(db_chunks)
elapsed = (time.perf_counter() - t0) * 1000
with self._lock:
@@ -158,11 +171,11 @@ def rebuild(self) -> None:
self._silos = new_silos
self._all_chunks = all_chunks
+ db_count = len(db_chunks)
+ md_count = len(all_chunks) - db_count
log.info(
- "✅ Índice BM25 por silo pronto — %s chunk(s) | %s silo(s) | rebuild em %.1fms",
- len(all_chunks),
- len(new_silos),
- elapsed,
+ "✅ Índice BM25 por silo pronto — %s chunk(s) (%s .md + %s MySQL) | %s silo(s) | rebuild em %.1fms",
+ len(all_chunks), md_count, db_count, len(new_silos), elapsed,
)
def normalize_discipline(self, raw: str | None) -> str | None:
@@ -225,7 +238,10 @@ def search(
return self._hits_in_silo(nd, query, top_k)
if self._global_context_mode == "geral":
- return self._hits_in_silo("geral", query, top_k)
+ hits = self._hits_in_silo("geral", query, top_k)
+ hits += self._hits_in_silo("db", query, top_k)
+ hits.sort(key=lambda h: h["score"], reverse=True)
+ return hits[:top_k]
merged: list[dict] = []
for silo in sorted(self._silos.keys()):
diff --git a/main.py b/main.py
index a8efe52..b45637f 100644
--- a/main.py
+++ b/main.py
@@ -24,6 +24,7 @@
settings.content_dir,
settings.bm25_score_threshold,
settings.global_context_mode,
+ settings=settings,
)
observer = start_content_observer(search_engine, settings.content_dir)
diff --git a/requirements.txt b/requirements.txt
index f93aa6e..bbba313 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,5 @@ python-dotenv
jinja2
rank-bm25
watchdog
-pytest
\ No newline at end of file
+pytest
+PyMySQL
\ No newline at end of file
diff --git a/templates/index.html b/templates/index.html
index 9e63d6c..f7c13e9 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -32,6 +32,7 @@
content/doc), /python, /visualizacao-sql,
/projeto-bloco, /planejamento-curso-carreira (RAG sĂł na disciplina).