-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasic_usage.py
More file actions
81 lines (64 loc) · 2.35 KB
/
basic_usage.py
File metadata and controls
81 lines (64 loc) · 2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""Basic MicroRAG usage example.
This example demonstrates the core workflow:
- Creating a configuration
- Adding documents in various formats
- Building the index
- Searching for documents
- Managing documents (count, get, clear)
Before running, set MODEL_PATH to your sentence-transformer model directory.
"""
from microrag import RAGConfig, MicroRAG
from microrag.models import Document
# Set this to your sentence-transformer model path
MODEL_PATH = "/path/to/your/model"
def main() -> None:
# Create configuration with in-memory database
config = RAGConfig(
model_path=MODEL_PATH,
db_path=":memory:",
)
# Use context manager for automatic resource cleanup
with MicroRAG(config) as rag:
# Add documents - supports multiple formats
# Format 1: Plain strings
rag.add_documents([
"Python is a high-level programming language.",
"Machine learning enables computers to learn from data.",
])
# Format 2: Dictionaries with metadata
rag.add_documents([
{
"content": "DuckDB is an in-process analytical database.",
"metadata": {"topic": "databases", "year": 2019},
},
])
# Format 3: Document objects
rag.add_documents([
Document(
id="doc-vectors",
content="Vector databases store embeddings for similarity search.",
metadata={"topic": "databases"},
),
])
# Build indexes (required before searching)
print("Building index...")
rag.build_index()
# Search for documents
print("\nSearching for 'database':")
results = rag.search("database", top_k=3)
for result in results:
print(f" [{result.rank}] Score: {result.score:.3f}")
print(f" Content: {result.content[:60]}...")
if result.metadata:
print(f" Metadata: {result.metadata}")
# Document management
print(f"\nTotal documents: {rag.count()}")
# Get specific document by ID
doc = rag.get_document("doc-vectors")
if doc:
print(f"Retrieved doc: {doc.content[:50]}...")
# Clear all documents
rag.clear()
print(f"After clear: {rag.count()} documents")
if __name__ == "__main__":
main()