From fdee07f1f65ef811b49e61fd7397f110349ce8ec Mon Sep 17 00:00:00 2001
From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com>
Date: Wed, 6 Dec 2023 18:17:20 +0100
Subject: [PATCH 1/6] Change type of PromptModel invocation_layer_class init
 param (#6497)

---
 test/prompt/test_prompt_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/prompt/test_prompt_model.py b/test/prompt/test_prompt_model.py
index 5406172f63..78ca51f547 100644
--- a/test/prompt/test_prompt_model.py
+++ b/test/prompt/test_prompt_model.py
@@ -40,7 +40,7 @@ def test_constructor_with_no_supported_model():
 
 
 @pytest.mark.unit
-def test_constructor_with_invocation_layer_class_string(mock_auto_tokenizer):
+def test_constructor_with_invocation_layer_class_string():
     model = PromptModel(
         invocation_layer_class="haystack.nodes.prompt.invocation_layer.CohereInvocationLayer", api_key="fake_api_key"
     )

From c514720ccc9b547275c27f11d9ce837005406ef3 Mon Sep 17 00:00:00 2001
From: Julian Risch <julian.risch@deepset.ai>
Date: Tue, 12 Dec 2023 12:41:06 +0100
Subject: [PATCH 2/6] fix: mypy `"str" not callable` for
 `PromptModelInvocationLayer` (#6529)

* cast to PromptModelInvocationLayer

* fix pylint pointless-exception-statement

* use two variables to avoid re-assignment

* black

* use mocked tokenizer in unit test
---
 test/prompt/test_prompt_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/prompt/test_prompt_model.py b/test/prompt/test_prompt_model.py
index 78ca51f547..5406172f63 100644
--- a/test/prompt/test_prompt_model.py
+++ b/test/prompt/test_prompt_model.py
@@ -40,7 +40,7 @@ def test_constructor_with_no_supported_model():
 
 
 @pytest.mark.unit
-def test_constructor_with_invocation_layer_class_string():
+def test_constructor_with_invocation_layer_class_string(mock_auto_tokenizer):
     model = PromptModel(
         invocation_layer_class="haystack.nodes.prompt.invocation_layer.CohereInvocationLayer", api_key="fake_api_key"
     )

From 74d8f9508dcb3c1618fb270d64dd2e78a4fd3577 Mon Sep 17 00:00:00 2001
From: robpasternak <pasternakrs@gmail.com>
Date: Fri, 11 Aug 2023 12:01:52 +0200
Subject: [PATCH 3/6] Add normalization and weighting for `JoinDocuments`
 reciprocal rank fusion

---
 haystack/nodes/other/join_docs.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/haystack/nodes/other/join_docs.py b/haystack/nodes/other/join_docs.py
index 274e90a38d..ae4dd9db8d 100644
--- a/haystack/nodes/other/join_docs.py
+++ b/haystack/nodes/other/join_docs.py
@@ -155,8 +155,13 @@ def _calculate_rrf(self, results):
         K = 61
 
         scores_map = defaultdict(int)
-        for result in results:
+        weights = self.weights if self.weights else [1 / len(results)] * len(results)
+        for result, weight in zip(results, weights):
             for rank, doc in enumerate(result):
-                scores_map[doc.id] += 1 / (K + rank)
+                scores_map[doc.id] += (weight * len(results)) / (K + rank)
+
+        # Normalize scores
+        for id in scores_map:
+            scores_map[id] = scores_map[id] / (len(results) / K)
 
         return scores_map

From 78a6f6b163478d4ab66f341cdee6711040c548c5 Mon Sep 17 00:00:00 2001
From: robpasternak <pasternakrs@gmail.com>
Date: Fri, 1 Sep 2023 16:52:16 +0200
Subject: [PATCH 4/6] Add weights and score normalization for reciprocal rank
 fusion in JoinDocuments node.

---
 haystack/nodes/other/join_docs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/haystack/nodes/other/join_docs.py b/haystack/nodes/other/join_docs.py
index ae4dd9db8d..b94d7d14e5 100644
--- a/haystack/nodes/other/join_docs.py
+++ b/haystack/nodes/other/join_docs.py
@@ -156,11 +156,14 @@ def _calculate_rrf(self, results):
 
         scores_map = defaultdict(int)
         weights = self.weights if self.weights else [1 / len(results)] * len(results)
+
+        # Calculate weighted reciprocal rank fusion score
         for result, weight in zip(results, weights):
             for rank, doc in enumerate(result):
                 scores_map[doc.id] += (weight * len(results)) / (K + rank)
 
-        # Normalize scores
+        # Normalize scores. Note: len(results) / K is the maximum possible score,
+        # achieved by being ranked first in all results with non-zero weight.
         for id in scores_map:
             scores_map[id] = scores_map[id] / (len(results) / K)
 

From fe07486e61c51aaca8aad3227fbe3e2994225180 Mon Sep 17 00:00:00 2001
From: robpasternak <pasternakrs@gmail.com>
Date: Fri, 15 Dec 2023 16:39:57 +0100
Subject: [PATCH 5/6] Fix black-jupyter

---
 ...n-docs-weighting-rrf-c52ba00a25004fd4.yaml |  6 ++++
 test/nodes/test_join_documents.py             | 32 +++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 releasenotes/notes/join-docs-weighting-rrf-c52ba00a25004fd4.yaml

diff --git a/releasenotes/notes/join-docs-weighting-rrf-c52ba00a25004fd4.yaml b/releasenotes/notes/join-docs-weighting-rrf-c52ba00a25004fd4.yaml
new file mode 100644
index 0000000000..23a31d911e
--- /dev/null
+++ b/releasenotes/notes/join-docs-weighting-rrf-c52ba00a25004fd4.yaml
@@ -0,0 +1,6 @@
+---
+enhancements:
+  - |
+    Make `JoinDocuments` sensitive to `weights` parameter when
+    `join_mode` is reciprocal rank fusion. Add score normalization
+    for `JoinDocuments` when `join_mode` is reciprocal rank fusion.
diff --git a/test/nodes/test_join_documents.py b/test/nodes/test_join_documents.py
index 463aeaa577..246c107894 100644
--- a/test/nodes/test_join_documents.py
+++ b/test/nodes/test_join_documents.py
@@ -113,3 +113,35 @@ def test_joindocuments_concatenate_duplicate_docs_null_score():
     result, _ = join_docs.run(inputs)
     assert len(result["documents"]) == 3
     assert result["documents"] == expected_outputs["documents"]
+
+
+@pytest.mark.unit
+def test_joindocuments_rrf_weights():
+    """
+    Test that the reciprocal rank fusion method correctly handles weights.
+    """
+    inputs = [
+        {
+            "documents": [
+                Document(content="text document 1", content_type="text", score=0.2),
+                Document(content="text document 2", content_type="text", score=0.3),
+            ]
+        },
+        {
+            "documents": [
+                Document(content="text document 3", content_type="text", score=0.7),
+                Document(content="text document 4", content_type="text", score=None),
+            ]
+        },
+    ]
+
+    join_docs_none = JoinDocuments(join_mode="reciprocal_rank_fusion")
+    result_none, _ = join_docs_none.run(inputs)
+    join_docs_even = JoinDocuments(join_mode="reciprocal_rank_fusion", weights=[0.5, 0.5])
+    result_even, _ = join_docs_even.run(inputs)
+    join_docs_uneven = JoinDocuments(join_mode="reciprocal_rank_fusion", weights=[0.7, 0.3])
+    result_uneven, _ = join_docs_uneven.run(inputs)
+
+    assert result_none["documents"] == result_even["documents"]
+    assert result_uneven["documents"] != result_none["documents"]
+    assert result_uneven["documents"][0].score > result_none["documents"][0].score

From 6d64d2c86c623ccd02270e23733c9c8475c80c9a Mon Sep 17 00:00:00 2001
From: robpasternak <pasternakrs@gmail.com>
Date: Wed, 20 Dec 2023 17:29:21 +0100
Subject: [PATCH 6/6] Fix JoinDocuments test for rrf + score normalization

---
 test/nodes/test_join_documents.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/test/nodes/test_join_documents.py b/test/nodes/test_join_documents.py
index 246c107894..ae809f4994 100644
--- a/test/nodes/test_join_documents.py
+++ b/test/nodes/test_join_documents.py
@@ -3,6 +3,7 @@
 
 from haystack import Document
 from haystack.nodes.other.join_docs import JoinDocuments
+from copy import deepcopy
 
 
 @pytest.mark.unit
@@ -120,7 +121,7 @@ def test_joindocuments_rrf_weights():
     """
     Test that the reciprocal rank fusion method correctly handles weights.
     """
-    inputs = [
+    inputs_none = [
         {
             "documents": [
                 Document(content="text document 1", content_type="text", score=0.2),
@@ -135,12 +136,15 @@ def test_joindocuments_rrf_weights():
         },
     ]
 
+    inputs_even = deepcopy(inputs_none)
+    inputs_uneven = deepcopy(inputs_none)
+
     join_docs_none = JoinDocuments(join_mode="reciprocal_rank_fusion")
-    result_none, _ = join_docs_none.run(inputs)
+    result_none, _ = join_docs_none.run(inputs_none)
     join_docs_even = JoinDocuments(join_mode="reciprocal_rank_fusion", weights=[0.5, 0.5])
-    result_even, _ = join_docs_even.run(inputs)
+    result_even, _ = join_docs_even.run(inputs_even)
     join_docs_uneven = JoinDocuments(join_mode="reciprocal_rank_fusion", weights=[0.7, 0.3])
-    result_uneven, _ = join_docs_uneven.run(inputs)
+    result_uneven, _ = join_docs_uneven.run(inputs_uneven)
 
     assert result_none["documents"] == result_even["documents"]
     assert result_uneven["documents"] != result_none["documents"]