From 11b7cf0c554b6b6e4b5af53f382fea85a0cdf4aa Mon Sep 17 00:00:00 2001 From: x110 Date: Mon, 20 Nov 2023 10:38:14 +0400 Subject: [PATCH 1/9] added pods and pod_type arg to PineconeDocumentStore --- haystack/document_stores/pinecone.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/haystack/document_stores/pinecone.py b/haystack/document_stores/pinecone.py index 33da45f1f2..67fb975577 100644 --- a/haystack/document_stores/pinecone.py +++ b/haystack/document_stores/pinecone.py @@ -79,6 +79,8 @@ def __init__( environment: str = "us-west1-gcp", pinecone_index: Optional["pinecone.Index"] = None, embedding_dim: int = 768, + pods: Optional[int] = None, + pod_type: Optional[str] = None, return_embedding: bool = False, index: str = "document", similarity: str = "cosine", @@ -98,6 +100,8 @@ def __init__( regions are supported, contact Pinecone [here](https://www.pinecone.io/contact/) if required. :param pinecone_index: pinecone-client Index object, an index will be initialized or loaded if not specified. :param embedding_dim: The embedding vector size. + :param pods: The number of pods for the index to use,including replicas. + :param pod_type: The type of pod to use. :param return_embedding: Whether to return document embeddings. :param index: Name of index in document store to use. :param similarity: The similarity function used to compare document vectors. `"cosine"` is the default @@ -151,6 +155,8 @@ def __init__( self.duplicate_documents = duplicate_documents # Pinecone index params + self.pods = pods + self.pod_type = pod_type self.replicas = replicas self.shards = shards self.namespace = namespace @@ -182,6 +188,8 @@ def __init__( else: self.pinecone_indexes[self.index] = self._create_index( embedding_dim=self.embedding_dim, + pods=self.pods, + pod_type=self.pod_type, index=self.index, metric_type=self.metric_type, replicas=self.replicas, @@ -199,6 +207,8 @@ def _index(self, index) -> str: def _create_index( self, embedding_dim: int, + pods: Optional[int] = None, + pod_type: Optional[str] = None, index: Optional[str] = None, metric_type: Optional[str] = "cosine", replicas: Optional[int] = 1, @@ -225,6 +235,8 @@ def _create_index( pinecone.create_index( name=index, dimension=embedding_dim, + pods=pods, + pod_type=pod_type, metric=metric_type, replicas=replicas, shards=shards, @@ -254,6 +266,8 @@ def _index_connection_exists(self, index: str, create: bool = False) -> Optional if create: return self._create_index( embedding_dim=self.embedding_dim, + pods=self.pods, + pod_type=self.pod_type, index=index, metric_type=self.metric_type, replicas=self.replicas, From 7ac19d943898983dbb2d3c39566c27f91ad8c27e Mon Sep 17 00:00:00 2001 From: x110 Date: Mon, 20 Nov 2023 15:25:18 +0400 Subject: [PATCH 2/9] added a test --- test/others/test_pinecone.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 test/others/test_pinecone.py diff --git a/test/others/test_pinecone.py b/test/others/test_pinecone.py new file mode 100644 index 0000000000..cb595b36b5 --- /dev/null +++ b/test/others/test_pinecone.py @@ -0,0 +1,28 @@ +import os +from haystack.document_stores import PineconeDocumentStore +from dotenv import load_dotenv +import pinecone + +load_dotenv() + + +def test_document_store_properties(): + pods = 1 + pod_type = "starter" + environment = "gcp-starter" + index_name = "docs" + + document_store = PineconeDocumentStore( + api_key=os.getenv("PINECONE_API_KEY"), + environment=environment, + pods=pods, + pod_type=pod_type, + similarity="cosine", + embedding_dim=768, + index=index_name, + ) + pinecone.init(api_key=os.getenv("PINECONE_API_KEY"), environment=environment) + index_description = pinecone.describe_index(document_store.index) + assert index_description.pods == document_store.pods + assert index_description.pod_type == document_store.pod_type + pinecone.delete_index(index_name) From 88fa10f6e70f5318090aab5644bccd6d330dbf56 Mon Sep 17 00:00:00 2001 From: x110 Date: Mon, 20 Nov 2023 16:18:41 +0400 Subject: [PATCH 3/9] added releasenotes --- .../add-arg-to-PineconeDocumentStore-984add063663e70b.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 releasenotes/notes/add-arg-to-PineconeDocumentStore-984add063663e70b.yaml diff --git a/releasenotes/notes/add-arg-to-PineconeDocumentStore-984add063663e70b.yaml b/releasenotes/notes/add-arg-to-PineconeDocumentStore-984add063663e70b.yaml new file mode 100644 index 0000000000..75629e23f7 --- /dev/null +++ b/releasenotes/notes/add-arg-to-PineconeDocumentStore-984add063663e70b.yaml @@ -0,0 +1,4 @@ +--- +enhancements: + - | + Users can now define the number of pods and pod type directly when creating a PineconeDocumentStore instance. From 7762eed27fa600658d55b14c38e0ff5d7e6a0af8 Mon Sep 17 00:00:00 2001 From: x110 Date: Wed, 22 Nov 2023 15:46:32 +0400 Subject: [PATCH 4/9] added pod, pod_type to mock pinecone index and releated methods --- test/mocks/pinecone.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/mocks/pinecone.py b/test/mocks/pinecone.py index e25255f2c2..c1a3d80cc3 100644 --- a/test/mocks/pinecone.py +++ b/test/mocks/pinecone.py @@ -45,6 +45,8 @@ def __init__( api_key: Optional[str] = None, environment: Optional[str] = None, dimension: Optional[int] = None, + pods: Optional[int] = None, + pod_type: Optional[str] = None, metric: Optional[str] = None, replicas: Optional[int] = None, shards: Optional[int] = None, @@ -55,6 +57,8 @@ def __init__( self.environment = environment self.dimension = dimension self.metric = metric + self.pods = pods + self.pod_type = pod_type self.replicas = replicas self.shards = shards self.metadata_config = metadata_config @@ -338,6 +342,8 @@ def create_index( dimension: int, metric: str = "cosine", replicas: int = 1, + pods: int = 1, + pod_type: str = "p1", shards: int = 1, metadata_config: Optional[dict] = None, ): @@ -348,6 +354,8 @@ def create_index( dimension=dimension, metric=metric, replicas=replicas, + pods=pods, + pod_type=pod_type, shards=shards, metadata_config=metadata_config, ) From eb7279850a9b7cf0bfc0af12ab1fb4616574a8b2 Mon Sep 17 00:00:00 2001 From: x110 Date: Wed, 22 Nov 2023 16:01:10 +0400 Subject: [PATCH 5/9] removing test file from others --- test/others/test_pinecone.py | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 test/others/test_pinecone.py diff --git a/test/others/test_pinecone.py b/test/others/test_pinecone.py deleted file mode 100644 index cb595b36b5..0000000000 --- a/test/others/test_pinecone.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from haystack.document_stores import PineconeDocumentStore -from dotenv import load_dotenv -import pinecone - -load_dotenv() - - -def test_document_store_properties(): - pods = 1 - pod_type = "starter" - environment = "gcp-starter" - index_name = "docs" - - document_store = PineconeDocumentStore( - api_key=os.getenv("PINECONE_API_KEY"), - environment=environment, - pods=pods, - pod_type=pod_type, - similarity="cosine", - embedding_dim=768, - index=index_name, - ) - pinecone.init(api_key=os.getenv("PINECONE_API_KEY"), environment=environment) - index_description = pinecone.describe_index(document_store.index) - assert index_description.pods == document_store.pods - assert index_description.pod_type == document_store.pod_type - pinecone.delete_index(index_name) From 2a134164d618d3ba48397d6d303dcc012fdfdb96 Mon Sep 17 00:00:00 2001 From: x110 Date: Fri, 24 Nov 2023 09:34:30 +0400 Subject: [PATCH 6/9] added integration test --- test/document_stores/test_pinecone.py | 24 ++++++++++++++++++++++++ test/mocks/pinecone.py | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/test/document_stores/test_pinecone.py b/test/document_stores/test_pinecone.py index 6bacb9e48b..ea377a9170 100644 --- a/test/document_stores/test_pinecone.py +++ b/test/document_stores/test_pinecone.py @@ -38,11 +38,15 @@ def ds(self, monkeypatch, request) -> PineconeDocumentStore: monkeypatch.setattr(f"pinecone.{fname}", function, raising=False) for cname, class_ in getmembers(pinecone_mock, isclass): monkeypatch.setattr(f"pinecone.{cname}", class_, raising=False) + pods = request.param.get("pods", None) + pod_type = request.param.get("pod_type", None) return PineconeDocumentStore( api_key=os.environ.get("PINECONE_API_KEY") or "fake-pinecone-test-key", embedding_dim=768, embedding_field="embedding", + pods=pods, + pod_type=pod_type, index="haystack_tests", similarity="cosine", recreate_index=True, @@ -733,3 +737,23 @@ def test_split_overlap_meta(self, mocked_ds): } retrieved_docs = mocked_ds.get_all_documents() assert retrieved_docs[0].meta["_split_overlap"] == [{"doc_id": "test_id", "range": [0, 10]}] + + @pytest.mark.parametrize("ds", [{"pods": 1, "pod_type": "p1.x1"}, {"pods": 2, "pod_type": "p2.x2"}], indirect=True) + @pytest.mark.integration + def test_document_store_properties(self, monkeypatch, ds): + mock_description = pinecone_mock.IndexDescription( + name=ds.index, + metric="dotproduct", + replicas=1, + dimension=768.0, + shards=1, + pods=ds.pods, + pod_type=ds.pod_type, + status={"ready": True, "state": "Ready"}, + metadata_config=None, + source_collection="", + ) + monkeypatch.setattr(pinecone, "describe_index", MagicMock(return_value=mock_description)) + index_description = pinecone.describe_index(ds.index) + assert index_description.pods == ds.pods + assert index_description.pod_type == ds.pod_type diff --git a/test/mocks/pinecone.py b/test/mocks/pinecone.py index c1a3d80cc3..b74dd459fe 100644 --- a/test/mocks/pinecone.py +++ b/test/mocks/pinecone.py @@ -343,7 +343,7 @@ def create_index( metric: str = "cosine", replicas: int = 1, pods: int = 1, - pod_type: str = "p1", + pod_type: str = "p1.x1", shards: int = 1, metadata_config: Optional[dict] = None, ): From 8a170196b613148e2ccc69b115044105f65d833d Mon Sep 17 00:00:00 2001 From: x110 Date: Fri, 24 Nov 2023 09:56:11 +0400 Subject: [PATCH 7/9] fixes --- test/document_stores/test_pinecone.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/document_stores/test_pinecone.py b/test/document_stores/test_pinecone.py index ea377a9170..f32c479c5a 100644 --- a/test/document_stores/test_pinecone.py +++ b/test/document_stores/test_pinecone.py @@ -38,8 +38,9 @@ def ds(self, monkeypatch, request) -> PineconeDocumentStore: monkeypatch.setattr(f"pinecone.{fname}", function, raising=False) for cname, class_ in getmembers(pinecone_mock, isclass): monkeypatch.setattr(f"pinecone.{cname}", class_, raising=False) - pods = request.param.get("pods", None) - pod_type = request.param.get("pod_type", None) + params = getattr(request, "param", {}) + pods = params.get("pods", None) + pod_type = params.get("pod_type", None) return PineconeDocumentStore( api_key=os.environ.get("PINECONE_API_KEY") or "fake-pinecone-test-key", From 3425fad9a294f7bb75526ddd0ce74b01e213683b Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 4 Dec 2023 18:16:20 +0100 Subject: [PATCH 8/9] set default values for new parameters --- haystack/document_stores/pinecone.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/haystack/document_stores/pinecone.py b/haystack/document_stores/pinecone.py index 67fb975577..243ff19a9b 100644 --- a/haystack/document_stores/pinecone.py +++ b/haystack/document_stores/pinecone.py @@ -79,8 +79,8 @@ def __init__( environment: str = "us-west1-gcp", pinecone_index: Optional["pinecone.Index"] = None, embedding_dim: int = 768, - pods: Optional[int] = None, - pod_type: Optional[str] = None, + pods: int = 1, + pod_type: str = "p1.x1", return_embedding: bool = False, index: str = "document", similarity: str = "cosine", @@ -100,8 +100,8 @@ def __init__( regions are supported, contact Pinecone [here](https://www.pinecone.io/contact/) if required. :param pinecone_index: pinecone-client Index object, an index will be initialized or loaded if not specified. :param embedding_dim: The embedding vector size. - :param pods: The number of pods for the index to use,including replicas. - :param pod_type: The type of pod to use. + :param pods: The number of pods for the index to use, including replicas. Defaults to 1. + :param pod_type: The type of pod to use. Defaults to `"p1.x1"`. :param return_embedding: Whether to return document embeddings. :param index: Name of index in document store to use. :param similarity: The similarity function used to compare document vectors. `"cosine"` is the default @@ -207,8 +207,8 @@ def _index(self, index) -> str: def _create_index( self, embedding_dim: int, - pods: Optional[int] = None, - pod_type: Optional[str] = None, + pods: int = 1, + pod_type: str = "p1.x1", index: Optional[str] = None, metric_type: Optional[str] = "cosine", replicas: Optional[int] = 1, From fc70d82076888852139626a2525e932466e5f285 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 4 Dec 2023 18:17:55 +0100 Subject: [PATCH 9/9] rm test that needs a pro plan --- test/document_stores/test_pinecone.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/test/document_stores/test_pinecone.py b/test/document_stores/test_pinecone.py index f32c479c5a..7d92aef3e4 100644 --- a/test/document_stores/test_pinecone.py +++ b/test/document_stores/test_pinecone.py @@ -738,23 +738,3 @@ def test_split_overlap_meta(self, mocked_ds): } retrieved_docs = mocked_ds.get_all_documents() assert retrieved_docs[0].meta["_split_overlap"] == [{"doc_id": "test_id", "range": [0, 10]}] - - @pytest.mark.parametrize("ds", [{"pods": 1, "pod_type": "p1.x1"}, {"pods": 2, "pod_type": "p2.x2"}], indirect=True) - @pytest.mark.integration - def test_document_store_properties(self, monkeypatch, ds): - mock_description = pinecone_mock.IndexDescription( - name=ds.index, - metric="dotproduct", - replicas=1, - dimension=768.0, - shards=1, - pods=ds.pods, - pod_type=ds.pod_type, - status={"ready": True, "state": "Ready"}, - metadata_config=None, - source_collection="", - ) - monkeypatch.setattr(pinecone, "describe_index", MagicMock(return_value=mock_description)) - index_description = pinecone.describe_index(ds.index) - assert index_description.pods == ds.pods - assert index_description.pod_type == ds.pod_type