From 48252da8cb4c2689159d709d56b7f04ecf0b5434 Mon Sep 17 00:00:00 2001 From: Gabriel Diniz dos Santos Date: Wed, 8 Apr 2026 00:18:28 -0300 Subject: [PATCH 1/4] feat: integrate MySQL as BM25 knowledge source --- .env | 8 +++++- api/__pycache__/__init__.cpython-314.pyc | Bin 195 -> 172 bytes api/__pycache__/routes.cpython-314.pyc | Bin 4864 -> 6578 bytes api/routes.py | 24 ++++++++++++++++ app/__pycache__/__init__.cpython-314.pyc | Bin 300 -> 277 bytes app/__pycache__/factory.cpython-314.pyc | Bin 3019 -> 2996 bytes app/__pycache__/state.cpython-314.pyc | Bin 1058 -> 1035 bytes core/__pycache__/__init__.cpython-314.pyc | Bin 240 -> 217 bytes core/__pycache__/config.cpython-314.pyc | Bin 4868 -> 6100 bytes .../logging_config.cpython-314.pyc | Bin 1026 -> 1003 bytes core/config.py | 27 ++++++++++++++++++ engine/__pycache__/__init__.cpython-314.pyc | Bin 253 -> 230 bytes .../__pycache__/chat_provider.cpython-314.pyc | Bin 7378 -> 7355 bytes engine/__pycache__/context.cpython-314.pyc | Bin 22856 -> 22833 bytes .../__pycache__/pinned_store.cpython-314.pyc | Bin 5061 -> 5038 bytes engine/__pycache__/search.cpython-314.pyc | Bin 15048 -> 15987 bytes engine/__pycache__/watcher.cpython-314.pyc | Bin 4954 -> 4931 bytes engine/search.py | 26 +++++++++++++---- main.py | 1 + requirements.txt | 3 +- templates/index.html | 2 ++ 21 files changed, 84 insertions(+), 7 deletions(-) diff --git a/.env b/.env index cdcc756..503d971 100644 --- a/.env +++ b/.env @@ -1 +1,7 @@ -OPENROUTER_API_KEY = APIKEY \ No newline at end of file +OPENROUTER_API_KEY=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 + +DB_HOST=localhost +DB_PORT=3306 +DB_NAME=pybot +DB_USER=root +DB_PASSWORD= \ No newline at end of file diff --git a/api/__pycache__/__init__.cpython-314.pyc b/api/__pycache__/__init__.cpython-314.pyc index c32702309d8103c14e1b67941b160072573a3ab5..77c4f5b24fa375fc73047c1e190cbb629fbc5b56 100644 GIT binary patch delta 44 ycmX@ixQ3Ben~#@^0SLBixtb|Dk=I99*TpI(peR2pwIshdCZMt;BR_9qvjYGTCJq(= delta 67 zcmZ3(c$kq_n~#@^0SK)7u4al%ej!92UOiGFI&r5YF$}CHbami0E V%}vcK$uEv^&n)pMO`4eF005cu7D)gA diff --git a/api/__pycache__/routes.cpython-314.pyc b/api/__pycache__/routes.cpython-314.pyc index af7eb8d183b5eda987a64b550b1ba04828b54b1c..a002356db7f4dcb10d5fc25b531dc5d02d0e426d 100644 GIT binary patch delta 2717 zcmaJ@U2Gf25#GC_#N$6HQsR$k@kGfIX*rTAnUyRkQXJWC)5tD;Qqm+AO`MJw>EaVl z-aE#zUMSQ_;64ZS;az9~%rH$Wdc zdlYH8ak~I#=VoVSXJ@~eA?MJD)L~qOauT$;k*qoDhjM8->?+hc} z6-J7`=_-`-XZ!a%+0%449Pu8o@!n_C!PDI+?K8$Gtu0NN-s9TkqWCSTC}BZYvGFGL zi(vnW@%b!PpObV|o6la>m(-&1GM#E^1NZHloAecm@))DG>_Z+9m<~~qrJ|mfSMWjj z))*kY9ESsB(YB;6OPB*TWV~y8S9Al1i5YGo#7l^s5Fd~Mq;$R!XThp>Dj6n_vv#47$3&-`FRcg{0;57vAqpWr#Zi%;_1 ze9s`m9~vbY(5OEOWp@m8@QixT(f>EX=4z<{_{=2nEH4n`27zIYxO3$SPo zoq{A^3}l;n5lht~JmRUw{bU*ARcEN&fI#1#)*@s+jvj~3aGrVrt+gc37QBbtWf#%8_=xaxS`~xbs(jixqQ|o7cpV`WM6sgQUu|54{ zZCa>K3zdb%+Vs-v7q4I3PELKLv0(81)G_Mk&SO+53?DasSkGKDpEuqrsN>vffD@NW z#S1Ad&FN)T7ZfhF8srKRC*X@E`FU0229^t4Key`QApfT?&z_x050pp~`SeFGHLncr zNK{J1rFChCH0x#>StmyhlADPGvxc5T=jpK*p=DUvJxeW7JjK(4RCgJr8EiE+(neOJ z_jlBDfPy338MP?o{$GSpoW=@tquyFZgNdnthN>eU4R7BPUot(&YwH+-7_b1 zPUo4u_yIZ)S4p$_^coJFhh0o>zFr@BIIbitoALiT&V8%afewJ#zX&>ZLb3I4uBSG7 zDs6q+?!KM2o?GsouQVI1UkQXLW8R}>XfsfdHBnws+R#tQg;}mGX6V{?*L6p*+ zm;eToQo+PGjvIgVB*IZ(P5UJQ7vmg6(R5+q zQeKlZO;(FmuqLIMF3VM3R7*wOWD5%oZn;qBaE6$+kj-^WnxIDBM0im5k4N@iSCVA> z46GA~YALvQE2DReEn8%B)b|oqW|OwL{)W)skb? z&vGRWckl0^=8J%FLNVZ&p0Z2;7h? zw^#`+eQCG(ox8~9aPGQ|_x%d}1Z|`Ple1TjyH3ZcEnje^4{ar>nI79#w~g?9VIrT3 z0R1p>{4h+vVU7>N^xKhxKz|oF6@=;I9txB{?i)TGW&RjtK#~p{?*(#YpQtK|BI<3>`MEw>=Xg R|AW3@5Z&=lny{A7{{ZdPR0se7 delta 1031 zcmY*XO-vI(6rS1Lw)Ahswy@>@r&uV65Ts&^knk61LbO@L#H0-=Wm6VQx0%_3H;9Cr z9LQX}dh}!zPkJyVYQjwq#JC5dXx4xG+4eMQ%*|p=ADfeTXj;7hgi`b=t#QM zsv)6FP@M`=T?)!ppS|z_u@i{?kh>C7^#IHJA1j~=Arz6>1g2~sKkptJzmb@u8lj0y zDz&hfxSi7`h`y>NCX94pMa!86O-$+LbYX#Q3thI0@V$Tcm9UAihmA<_UJn>7XF96` zbzaYtMz~WEAl%wYs<~5IGFCLw0;-R_mR_#AK_mPbv2pNnaBz?TBA8qNL3vZ!OYaH) zvIGs>!Q*V8KWG$V`*8^$JB~{7Lr1i1AU@!vPumJALVN(52;q}o62fTr*!D&gg?KA_ zBKmQNZM*C00z*7!IC)p%QdvXjjt~!XPH&`QIhS`_@$U0mMFZ<)hFyW~vpLnOiW*ew z)Oxi+ZR``(rpiYlJIVIc2|1F@*M9~Hq8sbRChw}I&jPYmG p@*TB*L-H5o{DGn$QS=ZEf3 StreamingResponse: detail="Campo 'session_id' deve ser string ou omitido.", ) + if user_message.strip().lower() == "/reload": + log.info("🔄 Comando /reload recebido — reconstruindo índice BM25...") + services.search_engine.rebuild() + chunk_count = len(services.search_engine.chunks) + db_count = sum(1 for c in services.search_engine.chunks if c.get("source", "").startswith("db:")) + md_count = chunk_count - db_count + status = ( + f"Índice reconstruído: {chunk_count} chunk(s) total " + f"({md_count} de arquivos .md + {db_count} do MySQL)." + ) + log.info("✅ /reload concluído — %s", status) + + async def _reload_stream() -> AsyncGenerator[str, None]: + yield f"data: {status}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse( + _reload_stream(), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no", "Connection": "keep-alive"}, + ) + built = services.context_manager.build_messages( user_message, discipline_filter=discipline, diff --git a/app/__pycache__/__init__.cpython-314.pyc b/app/__pycache__/__init__.cpython-314.pyc index b4a0e4f46cd0b294809b83d1f9c00ee336ce0c02..db1147b4964a3e2ab5f657cf9b37ffd04e7c8c3a 100644 GIT binary patch delta 44 ycmZ3(G?j^0n~#@^0SLBixtdu#k$1JQu8UPnKv8~HYDs=^Oh9ExMt}5inl$mW7XY!u7d!v} diff --git a/app/__pycache__/factory.cpython-314.pyc b/app/__pycache__/factory.cpython-314.pyc index b32b8b8753180ac548c18d3f85f3d46fa13eb1f9..9c439849ec6f429ebbf9dc8216f4e9a402a5687f 100644 GIT binary patch delta 47 zcmX>tzD1l@n~#@^0SLBixte)nBkvzZVI3E%n1G`EtkjbH;+TNSl8pSk&BjbKI00%` B5E=jg delta 70 zcmdlYep;MYn~#@^0SIPKxth6VBkvzZEoWz|n9$ej!92UOiGFI&r5YF$}CHbami0E V%}vcK$uEv^&n)pMO`15v5dgLG7VZE5 diff --git a/core/__pycache__/config.cpython-314.pyc b/core/__pycache__/config.cpython-314.pyc index 463a9a02961bf2529e76063ce7e15a7fb31d6703..e40be64125f1a0e466ce4503ae89308b9cf5018c 100644 GIT binary patch delta 1570 zcmaKsTWB0r7=X{&+0D-GWG^#&zh*DlP19VPY!VxDF`Ck7H%*v~L?JOG&Dx|*vdiqI zAtFXlFrW{m=OIO3D)>+YMcEf03yKOxNLeMih*&5-mPs3^=!^e9+t#$A2j)B9{O6qi zoVoD7o&KT2-eR{|3A8V6{J2~cKes_e`X(_+PKIMjKs(gpzDQ#r; zjTvC8X@EAdWz)dL7#vBBBP@snpqZ*k=!z7OP#ZCvLf{j%!&USG-X!;lLQU2KotIy# z^=cCgox_C>n;C>7#By)9u57F0DD5eW=JyRW#NOZ&G{8ROLo~?ZeArMB*cKm$0StvP z6fD$-$c*n{Dy2U2AalW{PmLa3ZP!B$M?(%h6#FN%Rd6aKPUPMWBF7cFK;?vD045a< z*eIufX~hg|l8*tK6(jJtVgk0vt-v;g2evB~V25G_b}9n!gkl4BDRyAD;s9n8=K?j7 zB#Dt3|AR+hzgl+Kur*F2Y~JdjedP`7X~T5YJo@b1*u<1lCAv;d$V%1N-QC&yS{O#~ zHny0%l#9U^kFBr9me)$TWi{WfPd+;`eoB`Yr%uUL0q2iQO-)b8qm0_zuJf@zC9Z57X}1`t6+2oXj^5K-1|E12|Og?((>pdw4y z$AOx%E_xQ zS6of|uC_f_+x4z}DRWcGRHWW*^KD;XXYRR%S?q-`^kpi3rk{Le_8`0avL5m^?aOwOZ(D_N zfqd8R&q~~`$DeKEc02v!JomleA5U^>f>CkW)dC5s!nqZwS*tmyPK&C{E{dTj)*Zcg z)as31ix3XDitn+H#PQ%yHs@Q}Cn35kMmIYU0|_if_c&4~9vV<<$uLE;LG8xu8FpNX q(Ghk=8Vf$YU6mhL%`dL6V5pAUxu zh-u^H*LouSDtry&H*^(0xw_>w;u123CDe${LIqQMV9RHSESor!^&3)KFyt|$DcvF$ z!}KA)H?_Q6u-5G=v8~$n_D;oG+pn&W`rWFv(kgeF)%|wMwo3K3-r1o&ehI7Hg2#_> zhAw+&8Si@+JUKRzS0cAVi6`i&O7fn&ha zP4^(+T86HFim-r3NI2Q`bgcm%<5c&K?-@64qAW78$(g*Nm_B2~^q5{FY5GmU3>c$x zgKj@v8+^fO>F$M4i4$tOuD&nP<4As`)*o>Dx;99HEGE!FLI4;^Vt^q4d@30RhyV#7 z0~ERyJ)obX_i&J|#`GZ@d?wWYA{qKACi^B?)}GjFM z-ZP(DxdiP diff --git a/core/__pycache__/logging_config.cpython-314.pyc b/core/__pycache__/logging_config.cpython-314.pyc index 69ccfacf0ef1fc36184690a102165731cc4cf944..c990bc299e1b34c8012cac7829610e0fda5d9abc 100644 GIT binary patch delta 47 zcmZqTc+JkM&Bx2d00djMT+Mv2kyngS*u=#uCZH%kE43uQI3}R7BqKj>voGUTCICw> B4?+L{ delta 70 zcmaFO-o(MH&Bx2d00gXkS2NFV dict[str, str]: @@ -87,6 +92,23 @@ def load(cls) -> Settings: raise RuntimeError("ACL_PINNED_WEAK_SCORE deve ser um número.") from None pinned_weak_score = max(0.05, min(0.95, pinned_weak_score)) + """ !Credenciais do banco! """ + + db_host = (os.getenv("DB_HOST") or "").strip() + + db_port_raw = (os.getenv("DB_PORT") or "3306").strip() + + try: + db_port = int(db_port_raw) + except ValueError: + raise RuntimeError("DB_PORT deve ser um inteiro.") from None + + db_name = (os.getenv("DB_NAME") or "").strip() + + db_user = (os.getenv("DB_USER") or "").strip() + + db_password = (os.getenv("DB_PASSWORD") or "").strip() + return cls( openrouter_api_key=key, project_root=project_root, @@ -100,4 +122,9 @@ def load(cls) -> Settings: pinned_max_turns=pinned_max_turns, pinned_max_chars=pinned_max_chars, pinned_weak_score=pinned_weak_score, + db_host=db_host, + db_port=db_port, + db_name=db_name, + db_user=db_user, + db_password=db_password, ) diff --git a/engine/__pycache__/__init__.cpython-314.pyc b/engine/__pycache__/__init__.cpython-314.pyc index 541881647de7612505a24101a5bf9fd944d6c47f..ece1cbe89edab343eba66180cd094f353b0643ba 100644 GIT binary patch delta 44 ycmey%_>7TPn~#@^0SLBixti%Yk+)gcz{M&ipeR2pwIshdCZMt;BR_BAE@uEQ`42Jx delta 67 zcmaFH_?MAan~#@^0SLJIu4Z~nej!92UOiGFI&r5YF$}CHbami0E V%}vcK$uEv^&n)pMO`5pa834k*7c>9> diff --git a/engine/__pycache__/chat_provider.cpython-314.pyc b/engine/__pycache__/chat_provider.cpython-314.pyc index 554a383dd3056d0e6ffb59671a968b8cc056f59b..347a60b329c764edfc216b5c6cdea20703186356 100644 GIT binary patch delta 47 zcmca)x!aOgn~#@^0SLBixth6gBQF<|u&IkxOh8e7R%%IpaZEsENk)F&W=E!pQUF`z B4|MPD>6Iz^FR2-9@n3$9jb8*s|qnW~}2zzT0fV`Yu*60Y&*)sU`WvF#(k&8Tomeh1pGm E0HBf(#sB~S delta 73 zcmdnEiSfiHMm}vmUM>b82w8bGbE*ACzT0eCZq8OQp~b01#WCrLiAgCj{&}e`MVV!( cF)sPZrManjCHciM?wKVXrAeFru$cw{0F?6@a{vGU diff --git a/engine/__pycache__/pinned_store.cpython-314.pyc b/engine/__pycache__/pinned_store.cpython-314.pyc index 953f4673bc464a1bb09744fd8dd5471e5ab41462..5834e63b9e48434a4d2cc1d8835545308a542265 100644 GIT binary patch delta 47 zcmX@AzD}K2n~#@^0SLBixte)rBQFoLu!)OROh8e7R%%IpaZEsENk)F&W@qLm0RUDB B4^998 delta 70 zcmZ3depH=Tn~#@^0SFvcT+Qs?$jig5lpf%mqw8ADRny69@wu-<6+N2c*9iUBWNxOz8x^)xNw*OKJD|Fh&IoELr z+oZB9`S;E}_ug~wJ@^o`ncW4Up-zUmpsTxrvAJw4%HFh zfHwC#x8$q&0d7=3R7Ee8xeFz6)MevZb!|MZ<~Y?oY8a}+p^#)*Wfpq4a!#^(Imy?d z-aw-T#WZuf%_51CrAYvNSZT9aO-Px!vZiGbDGTSSShgZ% z<1F@Tc|cu<^;}#)Txs}Xa9;8HAL&Dedwe@Kr3|^D5oR>l8?wQpm-cfhQ*Nw@i*PA( zt~cV!QMqS=(bd(BIMKM3aE?@GOKWV3%j4iyMj(EDZ+fZf!z)j0|Qc-s&l5usExPcMoi3dIkzH0?31SDPr zA3y{A6udG%gpZ-G9{Ds$i)XBw;|k0^BZW74c=}+u|@R_cmR#JXhtPHN#ZiO zoAoQhHccdQED>ebG$^V(K}zU99W{Ix4LR$aci~3NtZ)J8qz^f_@rCpyXDh#c;ZM$| z`Mc4GZ+VoRFlp5YOhVZ;4&Y>peTaYIje>9U_EMOm8IwvhktC#=KIje=q8VuVWJ1yS zjDk7|1%qG^PrJMLo%FK%NY*foS+k(ec#6w!rIQ%dJf)zMPd%EBkJ3e7w~v_vPvRi{ zG_UbftCkS<_tj4TS3%c3%$+^(tAlSGB=}?yn)UnTY4Q-#BEgfB7=6lLUa$vA6h#s^ zCXg`wwf_zyx`gJX%|%{XUKHYE3tNf``0VM&R*jmVm)ug72Vv5xD(@7)kpND(*@qpD zMOc``ng|Gr6R%GR3;!xUr)zBFG7g9;X!rloWH@9AH-b864QJ8grA2O7;6T5`mKeZE z5eL0edQYV@W0MRKha`+tW=A@MGCSAlJndoR)G4G&1E9-HY8`gx!|ogA0sHt0*aBtNMOaFe=>EeW_n(c__@cq}^{w$OWmJ5A_e zgQSI?3huOOy2PI3UqJDbG8M|(3TmsBVBC|Sj4w(n(-^+LJdJITS zT&+hmo0{G)r;>-{1QaQnNgj`76gi=paR%N-GfyP4l!BfK?)zUYn=WeUOoiXXZ2NxQ z!m||{`58#b<(0GIlGSmn`)K!j*6I&@HS>m_*iYLReI2v?zqh*I-?(M5xc{~3SIeIr zd}?sMWj^v^=Wk2eUz?uYdcL@SDNr--KePGg{b$D)yN1t~ZJ+I*OB}s#$yRvTR(io! zdLntzR{NpLd)Za>o~vrn6`C)&qH!Fi(ZtvVQ?VdG%;3}LuL^qz? zdC7Gf^vu>{=A-5%oAQCpF=u(=z7so7PQC0p^T0ycS?}Kjy=eLL<365tRW){h_E8yE zGQi)kaCYCZT{o3_(8B8#cbWRz4Zmy&4g`$nT)}}V!`lwmKv;OEy<(um^lmYa7`6^{ z(idvi_3E0iJTwrN9`q#k$W05RpOq>on+|Rj^uIO;j=Duie z|M1=YLtFa?`=U}G8G;dV1Ys5doth;`*c_t*`98uf08QARNl;Q9;fN)+`FHdriDaenI{Q^DTvcZSXtYD1A(s9*kK|PhNjr<~&TK}9Y zUixO3n%G)ap}c@nUPM4`|G)43A*@ka0kB>y6e_METy+#I3-E=~-RW93A!X)DLdzmj zRtslwU$X)U^<-X~pO%LbjsMReR`io6VRJb_@*(G*;>=*6k>0)59*% z8t|GcEh9t(+MMS$Xa8JD0)4l=wg8f*ab#^#ZlgUe7q!)g1Teddi5|9l>2!M`e}K-l zm-0rG;K#pWl0gzW@LvP802eVzTtvsy=i0pVt75GsY#L<7OlD|cT}v6%y)VN&Rsmoo zGM1}EkeQ~bb#?yhK(5%xJU?IaV@uXX$V(_-hP;(qPR`-PwI+rmZv%XW0~qxWb0=f- zrc)1R3!1g=bQVUL6T!cVG=ahw`*2T=JddCPgw0vA$r=oD;i+pmatcVzFqTvm%?PHp z5Bv+=hi#ZMB(R~Pj-c^F9=+*`{=2T`*)YK*m`W&6A)jqI{R)-uSa;@)Z>EB z8`VA4@Ojf??G1@=YRqlfdW}{7EL2n?k9kB8DZDBCJ-{e4NbkF&f2Iy7&72`=$kOUG zVKE4EqGnUnn3{}F$nd{2k-%M68OG#{s^{y>EEnh5`SmR?z0oUE87h`{Xg+A>@BjVHJu$DRKy| z8x`OS*U@jXvQR+SM=rugvqYnNC)G(J!)|DpnhC0ieOM6S-(^ycaG3tCbJIRz!eNJ; zf>F6K5mRG(Vv5Wv22F%evlZ4sI6sdoj^I>NJUx*~j?3g3WU;fmk-|4&_tDH<6Y0d{ kxZF+t06a8LWrRMwK4iSY1+Q@RbZ&jv5!dr@c0{)D-`)M!Jpcdz delta 3646 zcmb7GYitzP6`t9h+1=S)@58%(tzYZ4S+jO91{N^bv3a>M1dKfalVCf`vOAcK*SqG< zm_U*iq@;-mQYoCER8iW}B&tA7l}c{YCTc4vDxj!UK~dUdRJEvo(*CJz5$cNm={a|7 z2mf+1BYpSWbKmFj-FxqaTT{MPUx{15@8utSl8ZN-_f?9w&Npk)sC}Rg4tdJToTCA+ zu-29osgocW!BJ_TiCR{;7ImRAp+QiU3PE)zZS(!Ys;yB-c)%;Z1n-GEPL&yURWu~E zMzvu-dxek>paxgjaoNqJxjD6)r-0k z(ivXWS0VJPeqJe21K1GcvQI5VImBhZ8b-N{%K^0<1N7UqN!xnh>!ZduigjOC0&h}6$GNpcBwt7jCF+=AOT>p|%rSt%nC z&VOWbyC5j{{*1zDxZ7M0f0UZMBd&!G18#I!7|zwWYKDY@V?n)Iz?yTRHWGxba;REa zsJBro4-L``2?G^yEfn$83#!m4ToL;dUU*I33U_6-K{6#>%O7DL?8v+XJ_Mci7{`x= z;3-E_2|x27>P`zYUU=0}3-3DCh~@A%N7qJLhxrj6xbr#P7|!N0#wZJ-A|bL;EOd&$ z7z-h?FhLo@N*Hw3;0y)lTu(jLO^F7xMij4zYy}p47;bl^sJ)xyp=4fwW zELYIlEM+si<2vrJb^%c(+`Zh=Z8SMeOAfTjD`yEV`FO!-M-^~Jd8L-loUk$49E%Y= zg0QeDSmkaPSHaV6wv|>kB||%5n07n?H^p-bHw9Lgx5PRKdK%FZ>pf?@>mOd?BTrmh z3$vb>RE}zBY407h)|yNkM;S(qz`bU=vpGGJo6Mt0*ap&)GgyLPIa6$epL)@c(|`A- z>_J-G6y2snFHW(H#(H3HAR<=5Gl6EY11g4>C1URq9`e!l#=O&o@vs_sheYmudJz!lLHPb|9mVpVTz zj^?6-JS|*3cJuh}jx+K{LNx8;+DSG-vcl*XY!u{3L+R5b(p2maLPBOMpd-?O=Q$SH zCXP=3JW?uJ2imikIxIllmpc^0PK$w?l-;ue!uP_wz%vchRM~b&FyLvLK3DUW4T?1@ zp*QLeIUkX6;QTk~M>>eZd1pL%AX*>`@$bNv0%fHo4;1Kt{8R1pCn?zQs~6x}ZT%|O zNQqTPZjl#ml81ECf^Nv4l;H7(C+yVnTt>8=iDh)b+Df z@Qv7$zT7`?p{YUC{Nv4UMc-KD(C+0qL zmVWku8wo1ko?)Dc=r*;kMK%tvty@!l8x`~X^s{5X3chb}e%g1%#wOtIx{9GF4dq9sfH&Qk zBq=>pWSY*tMNO2s(5D zm(rJ=KYqsYGrNNNC4v4+8nYPQR>;3eI$}k9?-iMma9R$vd`WHG3E!s<4)p#QhCDf2 zr#GhMTsuX5eseUyO@AHr^cWO;rSRzL3Ygy9rOf$}l3qPHQwoXB{rjppKb<>x7CDOU z*Ne^Do8)WG7-4ej3M{TSdA6@ta4U`2Y~7&59M;x3k?;VFq7KbN`3#?R?mQJo!E{Zs zB3=c?Nyd|%h}rb&M%u__CN$${G0P~}tt;TmTY|`#y-x5$0&b|QD4Auclr=b&HVl?K zJY{HlD#g9Qt!!~QJpx!2AzhaGiwG|ZpGi;6N%YLGgAcn7w&In1KQH>_;Qg3KuD&1f z%b~d#2HiahP-`xlAW_C{LeHaho(`-qj5UEh4@V!}vfETJKX(_$#*hQa$edeEXQntY ynagYJBB}U3I3rMYSx;nF*F-TpmDjr2$EYKc(1*a^9f!{Dgnz$X#0?Pg9sD1nGyPcr diff --git a/engine/__pycache__/watcher.cpython-314.pyc b/engine/__pycache__/watcher.cpython-314.pyc index 71624369e18ae0467a9d2abda991964134b7d999..7679849a3a0268385d6a6581c65d119d793c8c17 100644 GIT binary patch delta 47 zcmcbmc36#9n~#@^0SLBixtf`_kvEV@Sl`7eCZH%kE43uQI3}R7BqKj>a|hEU0RUIc B57Phu delta 70 zcmX@Cc1w*{n~#@^0SG3|yP9dRkvEV@%PrX|CbT%Us5mA)F)=A6#y>CBr6{v3HO3`B YximL5uOz=X#yzvdqcmxAA=4!R02k93SO5S3 diff --git a/engine/search.py b/engine/search.py index ebf036a..5a10203 100644 --- a/engine/search.py +++ b/engine/search.py @@ -12,6 +12,8 @@ from rank_bm25 import BM25Okapi from core.config import GlobalContextMode +from core.config import Settings +from engine.database import fetch_db_chunks log = logging.getLogger(f"kernelbots.{__name__}") @@ -26,10 +28,12 @@ def __init__( content_dir: Path, score_threshold: float, global_context_mode: GlobalContextMode = "geral", + settings: Settings | None = None, # <-- adicionar ) -> None: self._content_dir = content_dir.resolve() self._score_threshold = score_threshold self._global_context_mode: GlobalContextMode = global_context_mode + self._settings = settings self._lock = threading.RLock() self._silos: dict[str, dict[str, Any]] = {} self._discipline_ids: frozenset[str] = frozenset() @@ -151,6 +155,15 @@ def rebuild(self) -> None: log.warning( "⚠ Nenhum .md indexado — BM25 desativado. Modo assistente geral ativo." ) + + # --- chunks do MySQL (silo "db") --- + db_chunks: list[dict] = [] + if self._settings is not None: + db_chunks = fetch_db_chunks(self._settings) + if db_chunks: + tokenized_db = [self._tokenize(c["text"]) for c in db_chunks] + new_silos["db"] = {"chunks": db_chunks, "bm25": BM25Okapi(tokenized_db)} + all_chunks.extend(db_chunks) elapsed = (time.perf_counter() - t0) * 1000 with self._lock: @@ -158,11 +171,11 @@ def rebuild(self) -> None: self._silos = new_silos self._all_chunks = all_chunks + db_count = len(db_chunks) + md_count = len(all_chunks) - db_count log.info( - "✅ Índice BM25 por silo pronto — %s chunk(s) | %s silo(s) | rebuild em %.1fms", - len(all_chunks), - len(new_silos), - elapsed, + "✅ Índice BM25 por silo pronto — %s chunk(s) (%s .md + %s MySQL) | %s silo(s) | rebuild em %.1fms", + len(all_chunks), md_count, db_count, len(new_silos), elapsed, ) def normalize_discipline(self, raw: str | None) -> str | None: @@ -225,7 +238,10 @@ def search( return self._hits_in_silo(nd, query, top_k) if self._global_context_mode == "geral": - return self._hits_in_silo("geral", query, top_k) + hits = self._hits_in_silo("geral", query, top_k) + hits += self._hits_in_silo("db", query, top_k) + hits.sort(key=lambda h: h["score"], reverse=True) + return hits[:top_k] merged: list[dict] = [] for silo in sorted(self._silos.keys()): diff --git a/main.py b/main.py index a8efe52..b45637f 100644 --- a/main.py +++ b/main.py @@ -24,6 +24,7 @@ settings.content_dir, settings.bm25_score_threshold, settings.global_context_mode, + settings=settings, ) observer = start_content_observer(search_engine, settings.content_dir) diff --git a/requirements.txt b/requirements.txt index f93aa6e..bbba313 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ python-dotenv jinja2 rank-bm25 watchdog -pytest \ No newline at end of file +pytest +PyMySQL \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index 9e63d6c..f7c13e9 100644 --- a/templates/index.html +++ b/templates/index.html @@ -32,6 +32,7 @@ content/doc), /python, /visualizacao-sql, /projeto-bloco, /planejamento-curso-carreira (RAG só na disciplina).

+ /reload /python o que são listas? /visualizacao-sql explique GROUP BY /projeto-bloco resuma o pipeline @@ -48,6 +49,7 @@ Enter envia · Shift+Enter nova linha · /python · /visualizacao-sql · /projeto-bloco · /planejamento-curso-carreira · /doc · /content + · /reload para reconstruir o índice