From 9260773c14993beb533ce78b10180a2bca4869e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=98=8E=E7=A5=AF?= Date: Thu, 14 Aug 2025 19:08:49 +0800 Subject: [PATCH 1/3] fix: without ids --- rag_factory/Retrieval/Retriever/Retriever_BM25.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rag_factory/Retrieval/Retriever/Retriever_BM25.py b/rag_factory/Retrieval/Retriever/Retriever_BM25.py index 98eef3e..9749166 100644 --- a/rag_factory/Retrieval/Retriever/Retriever_BM25.py +++ b/rag_factory/Retrieval/Retriever/Retriever_BM25.py @@ -5,7 +5,7 @@ from concurrent.futures import ThreadPoolExecutor from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence from dataclasses import dataclass, field - +import uuid from pydantic import ConfigDict, Field, model_validator logger = logging.getLogger(__name__) @@ -207,7 +207,7 @@ def from_texts( f"与 texts 长度 ({len(texts_list)}) 不匹配" ) else: - ids_list = [None for _ in texts_list] + ids_list = [str(uuid.uuid4()) for _ in texts_list] # 预处理文本 logger.info(f"正在预处理 {len(texts_list)} 个文本...") From 877d880cc2028eb2213129013baca8f7ced818c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=98=8E=E7=A5=AF?= Date: Fri, 15 Aug 2025 15:16:52 +0800 Subject: [PATCH 2/3] remove api key --- examples/TCL_rag/config.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/TCL_rag/config.yaml b/examples/TCL_rag/config.yaml index 231b3a8..9551aff 100644 --- a/examples/TCL_rag/config.yaml +++ b/examples/TCL_rag/config.yaml @@ -1,12 +1,12 @@ llm: name: openai - base_url: "https://api.gptsapi.net/v1" - api_key: "sk-2T06b7c7f9c3870049fbf8fada596b0f8ef908d1e233KLY2" + base_url: "xxx" + api_key: "xxx" model: "gpt-4.1-mini" embedding: name: huggingface - model_name: "/finance_ML/dataarc_syn_database/model/Qwen/qwen_embedding_0.6B" + model_name: "xxx" model_kwargs: device: "cuda:0" @@ -14,20 +14,20 @@ embedding: store: name: faiss - folder_path: /data/FinAi_Mapping_Knowledge/chenmingzhen/test_faiss_store + folder_path: xxx bm25: name: bm25 k: 10 - data_path: /data/FinAi_Mapping_Knowledge/chenmingzhen/tog3_backend/TCL/syn_table_data/data_all_clearn_short_chunk_with_caption_desc.json + data_path: xxx retriever: name: vectorstore reranker: name: qwen3 - model_name_or_path: "/finance_ML/dataarc_syn_database/model/Qwen/qwen_reranker_0.6B" + model_name_or_path: "xxx" device_id: "cuda:0" dataset: From 71d9cf9cac5463cbd339918484f9f97970e063d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=98=8E=E7=A5=AF?= Date: Fri, 15 Aug 2025 15:17:59 +0800 Subject: [PATCH 3/3] chore --- examples/TCL_rag/test.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/TCL_rag/test.py b/examples/TCL_rag/test.py index 9607f3f..34efb74 100644 --- a/examples/TCL_rag/test.py +++ b/examples/TCL_rag/test.py @@ -24,9 +24,8 @@ vector_store_config=vector_store_config, bm25_retriever_config=bm25_retriever_config) - result = rag.invoke("毛细管设计规范按照什么标准",k=20) + result = rag.invoke("模块机传感器端子不防呆的改善方案是什么?由哪个部门负责?",k=20) - answer = rag.answer("毛细管设计规范按照什么标准",result) - - - print(answer) \ No newline at end of file + for i in result: + print(i) + print("-"*100) \ No newline at end of file