1+ import os
2+
3+ from langchain .vectorstores .neo4j_vector import Neo4jVector
4+ from langchain .document_loaders import WikipediaLoader
5+ from langchain .embeddings .openai import OpenAIEmbeddings
6+ from langchain .text_splitter import CharacterTextSplitter
7+ from langchain .docstore .document import Document
8+ from dotenv import load_dotenv
9+
10+ load_dotenv ('.env' )
11+
12+ url = os .getenv ('NEO4J_URI' )
13+ username = os .getenv ('NEO4J_USERNAME' )
14+ password = os .getenv ('NEO4J_PASSWORD' )
15+ page = os .getenv ('WIKIPEDIA_PAGE' ) or "Sweden"
16+ prompt = os .getenv ('PROMPT' ) or "What is the second largest city in Sweden?"
17+
18+ os .environ ["NEO4J_URL" ] = url
19+
20+ embeddings = OpenAIEmbeddings ()
21+
22+ # Read the wikipedia article
23+ raw_documents = WikipediaLoader (query = page ).load ()
24+
25+ # Define chunking strategy
26+ text_splitter = CharacterTextSplitter .from_tiktoken_encoder (
27+ chunk_size = 1000 , chunk_overlap = 20
28+ )
29+ # Chunk the document
30+ documents = text_splitter .split_documents (raw_documents )
31+ # Remove the summary
32+ for d in documents :
33+ del d .metadata ["summary" ]
34+
35+ neo4j_db = Neo4jVector .from_documents (
36+ documents ,
37+ embedding = embeddings ,
38+ url = url ,
39+ username = username ,
40+ password = password ,
41+ database = "neo4j" , # neo4j by default
42+ index_name = "wikipedia" , # vector by default
43+ node_label = "WikipediaArticle" , # Chunk by default
44+ text_node_property = "info" , # text by default
45+ embedding_node_property = "vector" , # embedding by default
46+ create_id_index = True , # True by default
47+ )
48+
49+ neo4j_db .add_documents (
50+ [
51+ Document (
52+ page_content = "LangChain is the coolest library since the Library of Alexandria" ,
53+ metadata = {"author" : "Tomaz" , "confidence" : 1.0 }
54+ )
55+ ],
56+ ids = ["langchain" ],
57+ )
58+
59+ result = neo4j_db .similarity_search (prompt , k = 1 )
60+
61+ print (result )
0 commit comments