From ee208e0b03574e1397f7068c328e5e006d9004c1 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 12 Dec 2023 12:26:46 +0100 Subject: [PATCH 1/7] remove OpenAIAnswerGenerator --- haystack/nodes/__init__.py | 1 - haystack/nodes/answer_generator/__init__.py | 2 - haystack/nodes/answer_generator/base.py | 226 ------------- haystack/nodes/answer_generator/openai.py | 338 -------------------- test/nodes/test_generator.py | 208 ------------ test/pipelines/test_eval.py | 104 ------ 6 files changed, 879 deletions(-) delete mode 100644 haystack/nodes/answer_generator/__init__.py delete mode 100644 haystack/nodes/answer_generator/base.py delete mode 100644 haystack/nodes/answer_generator/openai.py delete mode 100644 test/nodes/test_generator.py diff --git a/haystack/nodes/__init__.py b/haystack/nodes/__init__.py index 9ac1f3268b..9ea7c18187 100644 --- a/haystack/nodes/__init__.py +++ b/haystack/nodes/__init__.py @@ -1,6 +1,5 @@ from haystack.nodes.base import BaseComponent -from haystack.nodes.answer_generator import BaseGenerator, OpenAIAnswerGenerator from haystack.nodes.document_classifier import BaseDocumentClassifier, TransformersDocumentClassifier from haystack.nodes.extractor import EntityExtractor, simplify_ner_for_qa from haystack.nodes.file_classifier import FileTypeClassifier diff --git a/haystack/nodes/answer_generator/__init__.py b/haystack/nodes/answer_generator/__init__.py deleted file mode 100644 index d4c7eeb558..0000000000 --- a/haystack/nodes/answer_generator/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from haystack.nodes.answer_generator.base import BaseGenerator -from haystack.nodes.answer_generator.openai import OpenAIAnswerGenerator diff --git a/haystack/nodes/answer_generator/base.py b/haystack/nodes/answer_generator/base.py deleted file mode 100644 index 3aedff4ac8..0000000000 --- a/haystack/nodes/answer_generator/base.py +++ /dev/null @@ -1,226 +0,0 @@ -from abc import abstractmethod -from typing import Any, List, Optional, Dict, Union - -from tqdm import tqdm - -from haystack.errors import HaystackError -from haystack.schema import Answer, Document, MultiLabel -from haystack.nodes.base import BaseComponent - - -class BaseGenerator(BaseComponent): - """ - Abstract class for Generators - """ - - outgoing_edges = 1 - - def __init__(self, progress_bar: bool = True): - super().__init__() - self.progress_bar = progress_bar - - @abstractmethod - def predict(self, query: str, documents: List[Document], top_k: Optional[int], max_tokens: Optional[int]) -> Dict: - """ - Abstract method to generate answers. - - :param query: Query string. - :param documents: Related documents (for example, coming from a retriever) the answer should be based on. - :param top_k: Number of returned answers. - :param max_tokens: The maximum number of tokens the generated answer can have. - :return: Generated answers plus additional infos in a dict. - """ - pass - - def run( # type: ignore - self, - query: str, - documents: List[Document], - top_k: Optional[int] = None, - labels: Optional[MultiLabel] = None, - add_isolated_node_eval: bool = False, - max_tokens: Optional[int] = None, - ): # type: ignore - """ - :param query: Query string. - :param documents: List of Documents the answer should be based on. - :param top_k: The maximum number of answers to return. - :param labels: Labels to be used for evaluation. - :param add_isolated_node_eval: If True, the answer generator will be evaluated in isolation. - :param max_tokens: The maximum number of tokens the generated answer can have. - """ - if documents: - results = self.predict(query=query, documents=documents, top_k=top_k, max_tokens=max_tokens) - else: - results = {"answers": []} - - # run evaluation with "perfect" labels as node inputs to calculate "upper bound" metrics for just this node - if add_isolated_node_eval and labels is not None: - relevant_documents = list({label.document.id: label.document for label in labels.labels}.values()) - results_label_input = self.predict( - query=query, documents=relevant_documents, top_k=top_k, max_tokens=max_tokens - ) - results["answers_isolated"] = results_label_input["answers"] - - return results, "output_1" - - def run_batch( # type: ignore - self, - queries: List[str], - documents: Union[List[Document], List[List[Document]]], - top_k: Optional[int] = None, - labels: Optional[List[MultiLabel]] = None, - batch_size: Optional[int] = None, - add_isolated_node_eval: bool = False, - max_tokens: Optional[int] = None, - ): - """ - :param queries: List of query strings. - :param documents: List of list of Documents the answer should be based on. - :param top_k: The maximum number of answers to return. - :param labels: Labels to be used for evaluation. - :param add_isolated_node_eval: If True, the answer generator will be evaluated in isolation. - :param max_tokens: The maximum number of tokens the generated answer can have. - """ - results = self.predict_batch( - queries=queries, documents=documents, top_k=top_k, batch_size=batch_size, max_tokens=max_tokens - ) - - # run evaluation with "perfect" labels as node inputs to calculate "upper bound" metrics for just this node - if add_isolated_node_eval and labels is not None: - relevant_documents = [] - for labelx in labels: - # Deduplicate same Documents in a MultiLabel based on their Document ID and filter out empty Documents - relevant_docs_labels = list( - { - label.document.id: label.document - for label in labelx.labels - if not isinstance(label.document.content, str) or label.document.content.strip() != "" - }.values() - ) - relevant_documents.append(relevant_docs_labels) - results_label_input = self.predict_batch(queries=queries, documents=relevant_documents, top_k=top_k) - - results["answers_isolated"] = results_label_input["answers"] - return results, "output_1" - - def _flatten_docs(self, documents: List[Document]): - flat_docs_dict: Dict[str, Any] = {} - for document in documents: - for k, v in document.to_dict().items(): - if k not in flat_docs_dict: - flat_docs_dict[k] = [] - flat_docs_dict[k].append(v) - return flat_docs_dict - - def _create_answers( - self, generated_answers: List[str], documents: List[Document], prompt: Optional[str] = None - ) -> List[Answer]: - flat_docs_dict = self._flatten_docs(documents) - answers: List[Any] = [] - for generated_answer in generated_answers: - answers.append( - Answer( - answer=generated_answer, - document_ids=flat_docs_dict.get("id"), - type="generative", - meta={ - "doc_scores": flat_docs_dict.get("score"), - "content": flat_docs_dict.get("content"), - "titles": [d.get("name", "") for d in flat_docs_dict.get("meta", [])], - "doc_metas": flat_docs_dict.get("meta"), - "prompt": prompt, - }, - ) - ) - return answers - - def predict_batch( - self, - queries: List[str], - documents: Union[List[Document], List[List[Document]]], - top_k: Optional[int] = None, - batch_size: Optional[int] = None, - max_tokens: Optional[int] = None, - ): - """ - Generate the answer to the input queries. The generation will be conditioned on the supplied documents. - These documents can for example be retrieved via the Retriever. - - - If you provide a list containing a single query... - - - ... and a single list of Documents, the query will be applied to each Document individually. - - ... and a list of lists of Documents, the query will be applied to each list of Documents and the Answers - will be aggregated per Document list. - - - If you provide a list of multiple queries... - - - ... and a single list of Documents, each query will be applied to each Document individually. - - ... and a list of lists of Documents, each query will be applied to its corresponding list of Documents - and the Answers will be aggregated per query-Document pair. - - :param queries: List of queries. - :param documents: Related documents (for example, coming from a retriever) the answer should be based on. - Can be a single list of Documents or a list of lists of Documents. - :param top_k: Number of returned answers per query. - :param batch_size: Not applicable. - :param max_tokens: The maximum number of tokens the generated answer can have. - :return: Generated answers plus additional infos in a dict like this: - - ```python - {'queries': 'who got the first nobel prize in physics', - 'answers': - [{'query': 'who got the first nobel prize in physics', - 'answer': ' albert einstein', - 'meta': { 'doc_ids': [...], - 'doc_scores': [80.42758 ...], - 'doc_probabilities': [40.71379089355469, ... - 'content': ['Albert Einstein was a ...] - 'titles': ['"Albert Einstein"', ...] - }}]} - ``` - """ - # TODO: This method currently just calls the predict method multiple times, so there is room for improvement. - - results: Dict = {"queries": queries, "answers": []} - - single_doc_list = False - # Docs case 1: single list of Documents -> apply each query to all Documents - if len(documents) > 0 and isinstance(documents[0], Document): - single_doc_list = True - pb = tqdm(total=len(queries) * len(documents), disable=not self.progress_bar, desc="Generating answers") - for query in queries: - for doc in documents: - if not isinstance(doc, Document): - raise HaystackError(f"doc was of type {type(doc)}, but expected a Document.") - preds = self.predict(query=query, documents=[doc], top_k=top_k, max_tokens=max_tokens) - results["answers"].append(preds["answers"]) - pb.update(1) - pb.close() - - # Docs case 2: list of lists of Documents -> apply each query to corresponding list of Documents, if queries - # contains only one query, apply it to each list of Documents - elif len(documents) > 0 and isinstance(documents[0], list): - if len(queries) == 1: - queries = queries * len(documents) - if len(queries) != len(documents): - raise HaystackError("Number of queries must be equal to number of provided Document lists.") - pb = tqdm(total=min(len(queries), len(documents)), disable=not self.progress_bar, desc="Generating answers") - for query, cur_docs in zip(queries, documents): - if not isinstance(cur_docs, list): - raise HaystackError(f"cur_docs was of type {type(cur_docs)}, but expected a list of Documents.") - preds = self.predict(query=query, documents=cur_docs, top_k=top_k, max_tokens=max_tokens) - results["answers"].append(preds["answers"]) - pb.update(1) - pb.close() - - # Group answers by question in case of multiple queries and single doc list - if single_doc_list and len(queries) > 1: - answers_per_query = int(len(results["answers"]) / len(queries)) - answers = [] - for i in range(0, len(results["answers"]), answers_per_query): - answer_group = results["answers"][i : i + answers_per_query] - answers.append(answer_group) - results["answers"] = answers - - return results diff --git a/haystack/nodes/answer_generator/openai.py b/haystack/nodes/answer_generator/openai.py deleted file mode 100644 index 68edd16b03..0000000000 --- a/haystack/nodes/answer_generator/openai.py +++ /dev/null @@ -1,338 +0,0 @@ -import logging -import os -from typing import List, Optional, Tuple, Union -import warnings - -from haystack import Document -from haystack.environment import HAYSTACK_REMOTE_API_TIMEOUT_SEC -from haystack.nodes.answer_generator import BaseGenerator -from haystack.nodes.prompt import PromptTemplate -from haystack.utils.openai_utils import ( - load_openai_tokenizer, - openai_request, - _openai_text_completion_tokenization_details, - _check_openai_finish_reason, - check_openai_policy_violation, -) - -logger = logging.getLogger(__name__) - -OPENAI_TIMEOUT = float(os.environ.get(HAYSTACK_REMOTE_API_TIMEOUT_SEC, 30)) - - -class OpenAIAnswerGenerator(BaseGenerator): - """ - This component is now deprecated and will be removed in future versions. - Use `PromptNode` instead of `OpenAIAnswerGenerator`, - as explained in https://haystack.deepset.ai/tutorials/22_pipeline_with_promptnode. - - Uses the GPT-3 models from the OpenAI API to generate Answers based on the Documents it receives. - The Documents can come from a Retriever or you can supply them manually. - - To use this Node, you need an API key from an active OpenAI account. You can sign-up for an account - on the [OpenAI API website](https://openai.com/api/). - """ - - def __init__( - self, - api_key: str, - azure_base_url: Optional[str] = None, - azure_deployment_name: Optional[str] = None, - model: str = "text-davinci-003", - max_tokens: int = 50, - api_version: str = "2022-12-01", - top_k: int = 5, - temperature: float = 0.2, - presence_penalty: float = 0.1, - frequency_penalty: float = 0.1, - examples_context: Optional[str] = None, - examples: Optional[List[List[str]]] = None, - stop_words: Optional[List[str]] = None, - progress_bar: bool = True, - prompt_template: Optional[PromptTemplate] = None, - context_join_str: str = " ", - moderate_content: bool = False, - api_base: str = "https://api.openai.com/v1", - openai_organization: Optional[str] = None, - ): - """ - :param api_key: Your API key from OpenAI. It is required for this node to work. - :param azure_base_url: The base URL for the Azure OpenAI API. If not supplied, Azure OpenAI API will not be used. - This parameter is an OpenAI Azure endpoint, usually in the form `https://.openai.azure.com`. - :param azure_deployment_name: The name of the Azure OpenAI API deployment. If not supplied, Azure OpenAI API will not be used. - :param model: ID of the engine to use for generating the answer. You can select one of `"text-ada-001"`, - `"text-babbage-001"`, `"text-curie-001"`, or `"text-davinci-003"` - (from worst to best and from cheapest to most expensive). For more information about the models, - refer to the [OpenAI Documentation](https://platform.openai.com/docs/models/gpt-3). - :param max_tokens: The maximum number of tokens reserved for the generated Answer. - A higher number allows for longer answers without exceeding the max prompt length of the OpenAI model. - A lower number allows longer prompts with more documents passed as context, but the generated answer might be cut after max_tokens. - :param api_version: The version of the Azure OpenAI API to use. The default is `2022-12-01` version. - :param top_k: Number of generated Answers. - :param temperature: What sampling temperature to use. Higher values mean the model will take more risks and - value 0 (argmax sampling) works better for scenarios with a well-defined Answer. - :param presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they have already appeared - in the text. This increases the model's likelihood to talk about new topics. For more information about frequency and presence penalties, see - [parameter details in OpenAI](https://platform.openai.com/docs/api-reference/parameter-details). - :param frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing - frequency in the text so far, decreasing the model's likelihood to repeat the same line - verbatim. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/api-reference/parameter-details) - :param examples_context: A text snippet containing the contextual information used to generate the Answers for - the examples you provide. - If not supplied, the default from OpenAI API docs is used: - `"In 2017, U.S. life expectancy was 78.6 years."` - :param examples: List of (question, answer) pairs that helps steer the model towards the tone and answer - format you'd like. We recommend adding 2 to 3 examples. - If not supplied, the default from OpenAI API docs is used: - `[["Q: What is human life expectancy in the United States?", "A: 78 years."]]` - :param stop_words: Up to four sequences where the API stops generating further tokens. The returned text does not contain the stop sequence. - If you don't provide any stop words, the default value from OpenAI API docs is used: `["\\n", "<|endoftext|>"]`. - :param prompt_template: A PromptTemplate that tells the model how to generate answers given a - `context` and `query` supplied at runtime. The `context` is automatically constructed at runtime from a - list of provided documents. Use `example_context` and a list of `examples` to provide the model with examples to steer it towards the tone and answer format you would like. - If not supplied, the default prompt template is: - ```python - PromptTemplate( - "Please answer the question according to the above context." - "\\n===\\nContext: {examples_context}\\n===\\n{examples}\\n\\n" - "===\\nContext: {context}\\n===\\n{query}", - ) - ``` - To learn how variables, such as '{context}', are substituted in the prompt text, see - [PromptTemplate](https://docs.haystack.deepset.ai/docs/prompt_node#template-structure). - :param context_join_str: The separation string used to join the input documents to create the context - used by the PromptTemplate. - :param moderate_content: Whether to filter input and generated answers for potentially sensitive content - using the [OpenAI Moderation API](https://platform.openai.com/docs/guides/moderation). If the input or - answers are flagged, an empty list is returned in place of the answers. - :param api_base: The base URL for the OpenAI API, defaults to `"https://api.openai.com/v1"`. - :param openai_organization: The OpenAI-Organization ID, defaults to `None`. For more details, see see OpenAI - [documentation](https://platform.openai.com/docs/api-reference/requesting-organization). - """ - - warnings.warn( - "`OpenAIAnswerGenerator component is deprecated and will be removed in future versions. Use `PromptNode` " - "instead of `OpenAIAnswerGenerator`.", - category=DeprecationWarning, - ) - - super().__init__(progress_bar=progress_bar) - if (examples is None and examples_context is not None) or (examples is not None and examples_context is None): - logger.warning( - "If providing examples or examples_context, we recommend providing both of them " - "so the examples correctly refer to the examples_context." - ) - if examples_context is None: - examples_context = "In 2017, U.S. life expectancy was 78.6 years." - if examples is None: - examples = [["Q: What is human life expectancy in the United States?", "A: 78 years."]] - if stop_words is None: - stop_words = ["\n", "<|endoftext|>"] - if prompt_template is None: - prompt_template = PromptTemplate( - "Please answer the question according to the above context." - "\n===\nContext: {examples_context}\n===\n{examples}\n\n" - "===\nContext: {context}\n===\n{query}" - ) - else: - # Check for required prompts - required_params = ["context", "query"] - if not all(p in prompt_template.prompt_params for p in required_params): - raise ValueError( - "The OpenAIAnswerGenerator requires a PromptTemplate that has `context` and " - "`query` in its `prompt_params`. Supply a different `prompt_template` or " - "use the default one." - ) - - # Check for unsupported prompt parameters - optional_params = ["examples_context", "examples"] - unknown_params = [] - for p in prompt_template.prompt_params: - if p not in set(required_params + optional_params): - unknown_params.append(p) - if len(unknown_params) > 1: - raise ValueError( - f"The provided PromptTemplate has the prompt parameters, {unknown_params}, that are not supported " - f"by the OpenAIAnswerGenerator. The only prompt parameters that are supported are " - f"`examples_context`, `examples`, `context`, and `query`." - ) - - self.api_key = api_key - self.azure_base_url = azure_base_url - self.azure_deployment_name = azure_deployment_name - self.api_version = api_version - self.api_base = api_base - self.model = model - self.max_tokens = max_tokens - self.top_k = top_k - self.temperature = temperature - self.presence_penalty = presence_penalty - self.frequency_penalty = frequency_penalty - self.examples_context = examples_context - self.examples = examples - self.stop_words = stop_words - self.prompt_template = prompt_template - self.context_join_str = context_join_str - self.using_azure = self.azure_deployment_name is not None and self.azure_base_url is not None - self.moderate_content = moderate_content - self.openai_organization = openai_organization - - tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name=self.model) - - self.MAX_TOKENS_LIMIT = max_tokens_limit - self._tokenizer = load_openai_tokenizer(tokenizer_name=tokenizer_name) - - def predict( - self, - query: str, - documents: List[Document], - top_k: Optional[int] = None, - max_tokens: Optional[int] = None, - timeout: Union[float, Tuple[float, float]] = OPENAI_TIMEOUT, - ): - """ - Use the loaded QA model to generate Answers for a query based on the Documents it receives. - - Returns dictionaries containing Answers. - Note that OpenAI doesn't return scores for those Answers. - - Example: - ```python - { - 'query': 'Who is the father of Arya Stark?', - 'answers':[Answer( - 'answer': 'Eddard,', - 'score': None, - ),... - ] - } - ``` - - :param query: The query you want to provide. It's a string. - :param documents: List of Documents in which to search for the Answer. - :param top_k: The maximum number of Answers to return. - :param max_tokens: The maximum number of tokens the generated Answer can have. - :param timeout: How many seconds to wait for the server to send data before giving up, - as a float, or a :ref:`(connect timeout, read timeout) ` tuple. - Defaults to 10 seconds. - :return: Dictionary containing query and Answers. - """ - if top_k is None: - top_k = self.top_k - - # convert input to OpenAI format - prompt, input_docs = self._build_prompt_within_max_length(query=query, documents=documents) - logger.debug("Prompt being sent to OpenAI API with prompt %s.", prompt) - - payload = { - "model": self.model, - "prompt": prompt, - "max_tokens": max_tokens or self.max_tokens, - "stop": self.stop_words, - "n": top_k, - "temperature": self.temperature, - "presence_penalty": self.presence_penalty, - "frequency_penalty": self.frequency_penalty, - } - if self.using_azure: - url = f"{self.azure_base_url}/openai/deployments/{self.azure_deployment_name}/completions?api-version={self.api_version}" - else: - url = f"{self.api_base}/completions" - - headers = {"Content-Type": "application/json"} - if self.using_azure: - headers["api-key"] = self.api_key - else: - headers["Authorization"] = f"Bearer {self.api_key}" - if self.openai_organization: - headers["OpenAI-Organization"] = self.openai_organization - - if self.moderate_content and check_openai_policy_violation(input=prompt, headers=headers): - logger.info("Prompt '%s' will not be sent to OpenAI due to potential policy violation.", prompt) - return {"query": query, "answers": []} - - logger.debug("Prompt being sent to OpenAI API with prompt %s.", prompt) - res = openai_request(url=url, headers=headers, payload=payload, timeout=timeout) - _check_openai_finish_reason(result=res, payload=payload) - generated_answers = [ans["text"] for ans in res["choices"]] - if self.moderate_content and check_openai_policy_violation(input=generated_answers, headers=headers): - logger.info( - "Generated answers '%s' will not be returned due to potential policy violation.", generated_answers - ) - return {"query": query, "answers": []} - answers = self._create_answers(generated_answers, input_docs, prompt=prompt) - result = {"query": query, "answers": answers} - return result - - @staticmethod - def _create_context(documents: List[Document], join_str: str = " ") -> str: - """Join the documents to create a single context to be used in the PromptTemplate.""" - doc_contents = [doc.content for doc in documents] - # We reverse the docs to put the most relevant documents at the bottom of the context - context = join_str.join(reversed(doc_contents)) - return context - - def _fill_prompt(self, query: str, documents: List[Document]) -> str: - """Fills in the `prompt_template` with its `prompt_params` and returns the full prompt.""" - example_prompts = "\n---\n".join([f"{query}\n{answer}" for query, answer in self.examples]) - qa_prompt = f"Q: {query}\nA:" - - kwargs = {"context": self._create_context(documents, join_str=self.context_join_str), "query": qa_prompt} - if ( - "examples_context" in self.prompt_template.prompt_params - and "examples" in self.prompt_template.prompt_params - ): - kwargs["examples_context"] = self.examples_context - kwargs["examples"] = example_prompts - full_prompt = next(self.prompt_template.fill(**kwargs)) - return full_prompt - - def _build_prompt_within_max_length(self, query: str, documents: List[Document]) -> Tuple[str, List[Document]]: - """ - Builds the prompt for the GPT-3 model so that it can generate an Answer. If the prompt is too long based on the - MAX_TOKENS_LIMIT of the OpenAI model and `max_tokens` you specify, then documents (used to - construct the context) are thrown away until the prompt length fits within the MAX_TOKENS_LIMIT. - """ - full_prompt = self._fill_prompt(query, documents) - n_full_prompt_tokens = len(self._tokenizer.encode(full_prompt)) - - # for length restrictions of prompt see: https://platform.openai.com/docs/api-reference/completions/create#completions/create-max_tokens - leftover_token_len = self.MAX_TOKENS_LIMIT - n_full_prompt_tokens - self.max_tokens - - # Trim down the prompt (by removing documents) until it fits the models MAX_TOKENS_LIMIT - input_docs = documents - skipped_docs = 0 - # If leftover_token_len is negative we have gone past the MAX_TOKENS_LIMIT and the prompt must be trimmed - if leftover_token_len < 0: - n_skipped_tokens = 0 - # Reversing the order of documents b/c we want to throw away less relevant docs first - for doc in reversed(documents): - skipped_docs += 1 - n_skipped_tokens += len(self._tokenizer.encode(doc.content)) - - # Only skip enough tokens to fit within the MAX_TOKENS_LIMIT - if n_skipped_tokens >= abs(leftover_token_len): - break - - # Throw away least relevant docs - input_docs = documents[:-skipped_docs] - full_prompt = self._fill_prompt(query, input_docs) - n_full_prompt_tokens = len(self._tokenizer.encode(full_prompt)) - - if len(input_docs) == 0: - logger.warning( - "Skipping all of the provided Documents, as none of them fits the maximum token limit of %s. " - "The generated answers will therefore not be conditioned on any context.", - self.MAX_TOKENS_LIMIT, - ) - elif skipped_docs >= 1: - logger.warning( - "Skipping %s of the provided Documents, as using them would exceed the maximum token limit of %s.", - skipped_docs, - self.MAX_TOKENS_LIMIT, - ) - - logger.debug("Number of tokens in full prompt: %s", n_full_prompt_tokens) - logger.debug("Full prompt: %s", full_prompt) - return full_prompt, input_docs diff --git a/test/nodes/test_generator.py b/test/nodes/test_generator.py deleted file mode 100644 index f69663aeba..0000000000 --- a/test/nodes/test_generator.py +++ /dev/null @@ -1,208 +0,0 @@ -from unittest.mock import patch, create_autospec - -import pytest -from haystack import Pipeline -from haystack.schema import Document, Answer -from haystack.nodes.answer_generator import OpenAIAnswerGenerator -from haystack.nodes import PromptTemplate - -from ..conftest import fail_at_version - -import logging - - -@pytest.mark.unit -@fail_at_version(1, 23) -@patch("haystack.nodes.answer_generator.openai.load_openai_tokenizer") -def test_openaianswergenerator_deprecation(mock_load_tokenizer): - with pytest.warns(DeprecationWarning): - OpenAIAnswerGenerator(api_key="fake_api_key") - - -@pytest.mark.unit -@patch("haystack.nodes.answer_generator.openai.openai_request") -def test_no_openai_organization(mock_request): - with patch("haystack.nodes.answer_generator.openai.load_openai_tokenizer"): - generator = OpenAIAnswerGenerator(api_key="fake_api_key") - assert generator.openai_organization is None - - generator.predict(query="test query", documents=[Document(content="test document")]) - assert "OpenAI-Organization" not in mock_request.call_args.kwargs["headers"] - - -@pytest.mark.unit -@patch("haystack.nodes.answer_generator.openai.openai_request") -def test_openai_organization(mock_request): - with patch("haystack.nodes.answer_generator.openai.load_openai_tokenizer"): - generator = OpenAIAnswerGenerator(api_key="fake_api_key", openai_organization="fake_organization") - assert generator.openai_organization == "fake_organization" - - generator.predict(query="test query", documents=[Document(content="test document")]) - assert mock_request.call_args.kwargs["headers"]["OpenAI-Organization"] == "fake_organization" - - -@pytest.mark.unit -@patch("haystack.nodes.answer_generator.openai.openai_request") -def test_openai_answer_generator_default_api_base(mock_request): - with patch("haystack.nodes.answer_generator.openai.load_openai_tokenizer"): - generator = OpenAIAnswerGenerator(api_key="fake_api_key") - assert generator.api_base == "https://api.openai.com/v1" - generator.predict(query="test query", documents=[Document(content="test document")]) - assert mock_request.call_args.kwargs["url"] == "https://api.openai.com/v1/completions" - - -@pytest.mark.unit -@patch("haystack.nodes.answer_generator.openai.openai_request") -def test_openai_answer_generator_custom_api_base(mock_request): - with patch("haystack.nodes.answer_generator.openai.load_openai_tokenizer"): - generator = OpenAIAnswerGenerator(api_key="fake_api_key", api_base="https://fake_api_base.com") - assert generator.api_base == "https://fake_api_base.com" - generator.predict(query="test query", documents=[Document(content="test document")]) - assert mock_request.call_args.kwargs["url"] == "https://fake_api_base.com/completions" - - -@pytest.mark.integration -@pytest.mark.parametrize("haystack_openai_config", ["openai", "azure"], indirect=True) -def test_openai_answer_generator(haystack_openai_config, docs): - if not haystack_openai_config: - pytest.skip("No API key found, skipping test") - - openai_generator = OpenAIAnswerGenerator( - api_key=haystack_openai_config["api_key"], - azure_base_url=haystack_openai_config.get("azure_base_url", None), - azure_deployment_name=haystack_openai_config.get("azure_deployment_name", None), - model="text-babbage-001", - top_k=1, - ) - prediction = openai_generator.predict(query="Who lives in Berlin?", documents=docs, top_k=1) - assert len(prediction["answers"]) == 1 - assert "Carla" in prediction["answers"][0].answer - - -@pytest.mark.integration -@pytest.mark.parametrize("haystack_openai_config", ["openai", "azure"], indirect=True) -def test_openai_answer_generator_custom_template(haystack_openai_config, docs): - if not haystack_openai_config: - pytest.skip("No API key found, skipping test") - - lfqa_prompt = PromptTemplate( - """Synthesize a comprehensive answer from your knowledge and the following topk most relevant paragraphs and - the given question.\n===\\Paragraphs: {context}\n===\n{query}""" - ) - node = OpenAIAnswerGenerator( - api_key=haystack_openai_config["api_key"], - azure_base_url=haystack_openai_config.get("azure_base_url", None), - azure_deployment_name=haystack_openai_config.get("azure_deployment_name", None), - model="text-babbage-001", - top_k=1, - prompt_template=lfqa_prompt, - ) - prediction = node.predict(query="Who lives in Berlin?", documents=docs, top_k=1) - assert len(prediction["answers"]) == 1 - - -@pytest.mark.integration -@pytest.mark.parametrize("haystack_openai_config", ["openai", "azure"], indirect=True) -def test_openai_answer_generator_max_token(haystack_openai_config, docs, caplog): - if not haystack_openai_config: - pytest.skip("No API key found, skipping test") - - openai_generator = OpenAIAnswerGenerator( - api_key=haystack_openai_config["api_key"], - azure_base_url=haystack_openai_config.get("azure_base_url", None), - azure_deployment_name=haystack_openai_config.get("azure_deployment_name", None), - model="text-babbage-001", - top_k=1, - ) - openai_generator.MAX_TOKENS_LIMIT = 116 - with caplog.at_level(logging.INFO): - prediction = openai_generator.predict(query="Who lives in Berlin?", documents=docs, top_k=1) - assert "Skipping all of the provided Documents" in caplog.text - assert len(prediction["answers"]) == 1 - # Can't easily check content of answer since it is generative and can change between runs - - -# mock tokenizer that splits the string -class MockTokenizer: - def encode(self, *args, **kwargs): - return str.split(*args, **kwargs) - - def tokenize(self, *args, **kwargs): - return str.split(*args, **kwargs) - - -@pytest.mark.unit -def test_build_prompt_within_max_length(): - with patch("haystack.nodes.answer_generator.openai.load_openai_tokenizer") as mock_load_tokenizer: - mock_load_tokenizer.return_value = MockTokenizer() - - generator = OpenAIAnswerGenerator(api_key="fake_key", max_tokens=50) - generator.MAX_TOKENS_LIMIT = 92 - query = "query" - documents = [Document("most relevant document"), Document("less relevant document")] - prompt_str, prompt_docs = generator._build_prompt_within_max_length(query=query, documents=documents) - - assert len(prompt_docs) == 1 - assert prompt_docs[0] == documents[0] - - -@pytest.mark.unit -def test_openai_answer_generator_pipeline_max_tokens(): - """ - tests that the max_tokens parameter is passed to the generator component in the pipeline - """ - question = "What is New York City like?" - mocked_response = "Forget NYC, I was generated by the mock method." - nyc_docs = [Document(content="New York is a cool and amazing city to live in the United States of America.")] - pipeline = Pipeline() - - # mock load_openai_tokenizer to avoid accessing the internet to init tiktoken - with patch("haystack.nodes.answer_generator.openai.load_openai_tokenizer"): - openai_generator = OpenAIAnswerGenerator(api_key="fake_api_key", model="text-babbage-001", top_k=1) - - pipeline.add_node(component=openai_generator, name="generator", inputs=["Query"]) - openai_generator.run = create_autospec(openai_generator.run) - openai_generator.run.return_value = ({"answers": mocked_response}, "output_1") - - result = pipeline.run(query=question, documents=nyc_docs, params={"generator": {"max_tokens": 3}}) - assert result["answers"] == mocked_response - openai_generator.run.assert_called_with(query=question, documents=nyc_docs, max_tokens=3) - - -@pytest.mark.unit -@patch("haystack.nodes.answer_generator.openai.OpenAIAnswerGenerator.predict") -def test_openai_answer_generator_run_with_labels_and_isolated_node_eval(patched_predict, eval_labels): - label = eval_labels[0] - query = label.query - document = label.labels[0].document - - patched_predict.return_value = { - "answers": [Answer(answer=label.labels[0].answer.answer, document_ids=[document.id])] - } - with patch("haystack.nodes.answer_generator.openai.load_openai_tokenizer"): - openai_generator = OpenAIAnswerGenerator(api_key="fake_api_key", model="text-babbage-001", top_k=1) - result, _ = openai_generator.run(query=query, documents=[document], labels=label, add_isolated_node_eval=True) - - assert "answers_isolated" in result - - -@pytest.mark.unit -@patch("haystack.nodes.answer_generator.base.BaseGenerator.predict_batch") -def test_openai_answer_generator_run_batch_with_labels_and_isolated_node_eval(patched_predict_batch, eval_labels): - queries = [label.query for label in eval_labels] - documents = [[label.labels[0].document] for label in eval_labels] - - patched_predict_batch.return_value = { - "queries": queries, - "answers": [ - [Answer(answer=label.labels[0].answer.answer, document_ids=[label.labels[0].document.id])] - for label in eval_labels - ], - } - with patch("haystack.nodes.answer_generator.openai.load_openai_tokenizer"): - openai_generator = OpenAIAnswerGenerator(api_key="fake_api_key", model="text-babbage-001", top_k=1) - result, _ = openai_generator.run_batch( - queries=queries, documents=documents, labels=eval_labels, add_isolated_node_eval=True - ) - - assert "answers_isolated" in result diff --git a/test/pipelines/test_eval.py b/test/pipelines/test_eval.py index 19b952c14d..210460ea3f 100644 --- a/test/pipelines/test_eval.py +++ b/test/pipelines/test_eval.py @@ -8,7 +8,6 @@ import responses from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore -from haystack.nodes.answer_generator.openai import OpenAIAnswerGenerator from haystack.nodes.preprocessor import PreProcessor from haystack.nodes.prompt.prompt_node import PromptNode from haystack.nodes.query_classifier.transformers import TransformersQueryClassifier @@ -596,109 +595,6 @@ def test_extractive_qa_eval(reader, retriever_with_docs, tmp_path, eval_labels): assert isinstance(value, float) -@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True) -@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True) -@responses.activate -def test_generative_qa_eval(retriever_with_docs, tmp_path, eval_labels): - labels = eval_labels[:1] - responses.add( - responses.POST, - "https://api.openai.com/v1/completions", - json={"choices": [{"text": "test", "finish_reason": "stop"}, {"text": "test2", "finish_reason": "stop"}]}, - status=200, - ) - responses.add_passthru("https://openaipublic.blob.core.windows.net") - generator = OpenAIAnswerGenerator(api_key="dummy", top_k=2) - pipeline = GenerativeQAPipeline(generator=generator, retriever=retriever_with_docs) - eval_result = pipeline.eval(labels=labels, params={"Retriever": {"top_k": 5}}) - - metrics = eval_result.calculate_metrics(document_scope="document_id") - - generator_result = eval_result["Generator"] - retriever_result = eval_result["Retriever"] - - expected_generator_result_columns = [ - "answer", # answer-specific - "exact_match", # answer-specific - "f1", # answer-specific - # "sas", # answer-specific optional - "exact_match_context_scope", # answer-specific - "f1_context_scope", # answer-specific - # "sas_context_scope", # answer-specific optional - "exact_match_document_id_scope", # answer-specific - "f1_document_id_scope", # answer-specific - # "sas_document_id_scope", # answer-specific optional - "exact_match_document_id_and_context_scope", # answer-specific - "f1_document_id_and_context_scope", # answer-specific - # "sas_document_id_and_context_scope", # answer-specific optional - "offsets_in_document", # answer-specific - "gold_offsets_in_documents", # answer-specific - "offsets_in_context", # answer-specific - "gold_offsets_in_contexts", # answer-specific - "gold_answers_exact_match", # answer-specific - "gold_answers_f1", # answer-specific - # "gold_answers_sas", # answer-specific optional - "document_ids", # answer-specific - "prompt", # answer-specific - ] - - expected_retriever_result_columns = [ - "gold_id_match", # doc-specific - "context_match", # doc-specific - "answer_match", # doc-specific - "gold_id_or_answer_match", # doc-specific - "gold_id_and_answer_match", # doc-specific - "gold_id_or_context_match", # doc-specific - "gold_id_and_context_match", # doc-specific - "gold_id_and_context_and_answer_match", # doc-specific - "context_and_answer_match", # doc-specific - "gold_answers_match", # doc-specific, - "document_id", # doc-specific - ] - - expected_generic_result_columns = [ - "multilabel_id", # generic - "query", # generic - "filters", # generic - "context", # generic - "gold_contexts", # generic - "gold_documents_id_match", # generic - "gold_contexts_similarity", # generic - "type", # generic - "node", # generic - "eval_mode", # generic - "rank", # generic - "gold_document_ids", # generic - "gold_answers", # generic - # "custom_document_id", # generic optional - # "gold_custom_document_ids", # generic optional - ] - - # all expected columns are part of the evaluation result dataframe - assert sorted(expected_generator_result_columns + expected_generic_result_columns + ["index"]) == sorted( - generator_result.columns - ) - assert sorted(expected_retriever_result_columns + expected_generic_result_columns + ["index"]) == sorted( - retriever_result.columns - ) - - assert generator_result["prompt"].iloc[0] is not None - - # assert metrics are floats - for node_metrics in metrics.values(): - for value in node_metrics.values(): - assert isinstance(value, float) - - eval_result.save(tmp_path) - saved_eval_result = EvaluationResult.load(tmp_path) - - for key, df in eval_result.node_results.items(): - pd.testing.assert_frame_equal(df, saved_eval_result[key]) - - loaded_metrics = saved_eval_result.calculate_metrics(document_scope="document_id") - assert metrics == loaded_metrics - - @pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True) @pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True) def test_generative_qa_w_promptnode_eval(retriever_with_docs, tmp_path, eval_labels): From 4b818f646e146011ec469ce5c22dca990eac5f27 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 12 Dec 2023 12:28:34 +0100 Subject: [PATCH 2/7] reno --- .../notes/remove_answer_generator-e7100f82c1859fcb.yaml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml diff --git a/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml b/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml new file mode 100644 index 0000000000..d84d71c0a1 --- /dev/null +++ b/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml @@ -0,0 +1,3 @@ +--- +deprecations: + - Remove deprecated `OpenAIAnswerGenerator`. From b2d23e065fc69146442ee2cb98ed848b661f5109 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 12 Dec 2023 12:39:55 +0100 Subject: [PATCH 3/7] Remove BaseGenerator and GenerativeQAPipeline --- docs/pydoc/config/base-generator.yml | 27 ----------------- haystack/pipelines/base.py | 5 +--- haystack/pipelines/standard_pipelines.py | 30 ------------------- ...ove_answer_generator-e7100f82c1859fcb.yaml | 2 +- test/conftest.py | 6 ---- 5 files changed, 2 insertions(+), 68 deletions(-) delete mode 100644 docs/pydoc/config/base-generator.yml diff --git a/docs/pydoc/config/base-generator.yml b/docs/pydoc/config/base-generator.yml deleted file mode 100644 index 41d8fd8907..0000000000 --- a/docs/pydoc/config/base-generator.yml +++ /dev/null @@ -1,27 +0,0 @@ -loaders: - - type: python - search_path: [../../../haystack/nodes/answer_generator] - modules: ["base"] - ignore_when_discovered: ["__init__"] -processors: - - type: filter - expression: - documented_only: true - do_not_filter_modules: false - skip_empty_modules: true - - type: smart - - type: crossref -renderer: - type: renderers.ReadmeRenderer - excerpt: Abstract class for Generators. - category_slug: haystack-classes - title: BaseGenerator API - slug: basegenerator-api - order: 7 - parent_doc_slug: answer-generator-api - markdown: - descriptive_class_title: false - descriptive_module_title: true - add_method_class_prefix: true - add_member_class_prefix: false - filename: basegenerator_api.md diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 94fa96aaf9..012f84ce64 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -39,7 +39,7 @@ from haystack.utils.deepsetcloud import DeepsetCloud from haystack.schema import Answer, EvaluationResult, MultiLabel, Document, Span from haystack.errors import HaystackError, PipelineError, PipelineConfigError, DocumentStoreError -from haystack.nodes import BaseGenerator, Docs2Answers, BaseReader, BaseSummarizer, BaseTranslator, QuestionGenerator +from haystack.nodes import Docs2Answers, BaseReader, BaseSummarizer, BaseTranslator, QuestionGenerator from haystack.nodes.base import BaseComponent, RootNode from haystack.nodes.retriever.base import BaseRetriever from haystack.document_stores.base import BaseDocumentStore @@ -2528,9 +2528,6 @@ def get_type(self) -> str: pipeline_types = { # QuestionGenerationPipeline has only one component, which is a QuestionGenerator "QuestionGenerationPipeline": lambda x: all(isinstance(x, QuestionGenerator) for x in x.values()), - # GenerativeQAPipeline has at least BaseGenerator and BaseRetriever components - "GenerativeQAPipeline": lambda x: any(isinstance(x, BaseRetriever) for x in x.values()) - and any(isinstance(x, BaseGenerator) for x in x.values()), # FAQPipeline has at least one Docs2Answers component "FAQPipeline": lambda x: any(isinstance(x, Docs2Answers) for x in x.values()), # ExtractiveQAPipeline has at least one BaseRetriever component and one BaseReader component diff --git a/haystack/pipelines/standard_pipelines.py b/haystack/pipelines/standard_pipelines.py index e1f8f61c72..0f2073cd3d 100644 --- a/haystack/pipelines/standard_pipelines.py +++ b/haystack/pipelines/standard_pipelines.py @@ -6,7 +6,6 @@ from typing import Any, Dict, List, Optional, Union, Literal from haystack.document_stores.base import BaseDocumentStore, FilterType -from haystack.nodes.answer_generator.base import BaseGenerator from haystack.nodes.other.docs2answers import Docs2Answers from haystack.nodes.other.document_merger import DocumentMerger from haystack.nodes.question_generator.question_generator import QuestionGenerator @@ -411,35 +410,6 @@ def run(self, query: str, params: Optional[dict] = None, debug: Optional[bool] = return output -class GenerativeQAPipeline(BaseStandardPipeline): - """ - Pipeline for Generative Question Answering. - """ - - def __init__(self, generator: BaseGenerator, retriever: BaseRetriever): - """ - :param generator: Generator instance - :param retriever: Retriever instance - """ - self.pipeline = Pipeline() - self.pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"]) - self.pipeline.add_node(component=generator, name="Generator", inputs=["Retriever"]) - - def run(self, query: str, params: Optional[dict] = None, debug: Optional[bool] = None): - """ - :param query: the query string. - :param params: params for the `retriever` and `generator`. For instance, - params={"Retriever": {"top_k": 10}, "Generator": {"top_k": 5}} - :param debug: Whether the pipeline should instruct nodes to collect debug information - about their execution. By default these include the input parameters - they received and the output they generated. - All debug information can then be found in the dict returned - by this method under the key "_debug" - """ - output = self.pipeline.run(query=query, params=params, debug=debug) - return output - - class SearchSummarizationPipeline(BaseStandardPipeline): """ Pipeline that retrieves documents for a query and then summarizes those documents. diff --git a/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml b/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml index d84d71c0a1..f551d56a72 100644 --- a/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml +++ b/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml @@ -1,3 +1,3 @@ --- deprecations: - - Remove deprecated `OpenAIAnswerGenerator`. + - Remove deprecated `OpenAIAnswerGenerator`, `BaseGenerator`, `GenerativeQAPipeline` and related tests. diff --git a/test/conftest.py b/test/conftest.py index 9343e56b73..06f89a400c 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -28,7 +28,6 @@ from haystack.nodes import ( BaseReader, BaseRetriever, - BaseGenerator, BaseSummarizer, BaseTranslator, DenseRetriever, @@ -321,11 +320,6 @@ def embed_documents(self, documents: List[Document]): return np.full((len(documents), 768), 0.5) -class MockSeq2SegGenerator(BaseGenerator): - def predict(self, query: str, documents: List[Document], top_k: Optional[int], max_tokens: Optional[int]) -> Dict: - pass - - class MockSummarizer(BaseSummarizer): def predict_batch( self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None From 4e16ec6bede4d0ea42a27e30f40718e3fd8f4980 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 12 Dec 2023 15:40:38 +0100 Subject: [PATCH 4/7] remove more references --- haystack/agents/base.py | 2 -- haystack/pipelines/__init__.py | 1 - test/pipelines/test_eval.py | 2 +- test/pipelines/test_eval_batch.py | 2 +- test/pipelines/test_pipeline.py | 4 ---- 5 files changed, 2 insertions(+), 9 deletions(-) diff --git a/haystack/agents/base.py b/haystack/agents/base.py index da6d1d61ba..f8b2504128 100644 --- a/haystack/agents/base.py +++ b/haystack/agents/base.py @@ -19,7 +19,6 @@ BaseStandardPipeline, ExtractiveQAPipeline, DocumentSearchPipeline, - GenerativeQAPipeline, SearchSummarizationPipeline, FAQPipeline, TranslationWrapperPipeline, @@ -57,7 +56,6 @@ def __init__( Pipeline, ExtractiveQAPipeline, DocumentSearchPipeline, - GenerativeQAPipeline, SearchSummarizationPipeline, FAQPipeline, TranslationWrapperPipeline, diff --git a/haystack/pipelines/__init__.py b/haystack/pipelines/__init__.py index ca770cd05a..a7a414e79a 100644 --- a/haystack/pipelines/__init__.py +++ b/haystack/pipelines/__init__.py @@ -9,7 +9,6 @@ MostSimilarDocumentsPipeline, QuestionAnswerGenerationPipeline, RetrieverQuestionGenerationPipeline, - GenerativeQAPipeline, ExtractiveQAPipeline, FAQPipeline, TextIndexingPipeline, diff --git a/test/pipelines/test_eval.py b/test/pipelines/test_eval.py index 210460ea3f..5dbca833be 100644 --- a/test/pipelines/test_eval.py +++ b/test/pipelines/test_eval.py @@ -15,7 +15,7 @@ from haystack.nodes.retriever.sparse import BM25Retriever from haystack.nodes.summarizer.transformers import TransformersSummarizer from haystack.pipelines.base import Pipeline -from haystack.pipelines import ExtractiveQAPipeline, GenerativeQAPipeline, SearchSummarizationPipeline +from haystack.pipelines import ExtractiveQAPipeline, SearchSummarizationPipeline from haystack.pipelines.standard_pipelines import ( DocumentSearchPipeline, FAQPipeline, diff --git a/test/pipelines/test_eval_batch.py b/test/pipelines/test_eval_batch.py index e765ba292b..1ea2de69cb 100644 --- a/test/pipelines/test_eval_batch.py +++ b/test/pipelines/test_eval_batch.py @@ -12,7 +12,7 @@ from haystack.nodes.retriever.sparse import BM25Retriever from haystack.nodes.summarizer.transformers import TransformersSummarizer from haystack.pipelines.base import Pipeline -from haystack.pipelines import ExtractiveQAPipeline, GenerativeQAPipeline, SearchSummarizationPipeline +from haystack.pipelines import ExtractiveQAPipeline, SearchSummarizationPipeline from haystack.pipelines.standard_pipelines import ( DocumentSearchPipeline, FAQPipeline, diff --git a/test/pipelines/test_pipeline.py b/test/pipelines/test_pipeline.py index 6c3724062a..985947691c 100644 --- a/test/pipelines/test_pipeline.py +++ b/test/pipelines/test_pipeline.py @@ -23,7 +23,6 @@ from haystack.pipelines import ( Pipeline, RootNode, - GenerativeQAPipeline, FAQPipeline, ExtractiveQAPipeline, SearchSummarizationPipeline, @@ -694,9 +693,6 @@ def test_generate_code_can_handle_weak_cyclic_pipelines(): @pytest.mark.unit def test_pipeline_classify_type(tmp_path): - pipe = GenerativeQAPipeline(generator=MockSeq2SegGenerator(), retriever=MockRetriever()) - assert pipe.get_type().startswith("GenerativeQAPipeline") - pipe = FAQPipeline(retriever=MockRetriever()) assert pipe.get_type().startswith("FAQPipeline") From afcf62f4854bfc2affd6dcb27b33b7f911d2c37c Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 12 Dec 2023 15:42:09 +0100 Subject: [PATCH 5/7] docs --- docs/pydoc/config/answer-generator.yml | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 docs/pydoc/config/answer-generator.yml diff --git a/docs/pydoc/config/answer-generator.yml b/docs/pydoc/config/answer-generator.yml deleted file mode 100644 index 285c2d0678..0000000000 --- a/docs/pydoc/config/answer-generator.yml +++ /dev/null @@ -1,26 +0,0 @@ -loaders: - - type: loaders.CustomPythonLoader - search_path: [../../../haystack/nodes/answer_generator] - modules: ["openai"] - ignore_when_discovered: ["__init__"] -processors: - - type: filter - expression: - documented_only: true - do_not_filter_modules: false - skip_empty_modules: true - - type: smart - - type: crossref -renderer: - type: renderers.ReadmeRenderer - excerpt: Reads a set of documents and generates an answer to a question, word by word - category_slug: haystack-classes - title: Answer Generator API - slug: answer-generator-api - order: 5 - markdown: - descriptive_class_title: false - descriptive_module_title: true - add_method_class_prefix: true - add_member_class_prefix: false - filename: answer_generator_api.md From 1278fcbbb63e483d4982aeb394710dea705effb7 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 13 Dec 2023 10:40:43 +0100 Subject: [PATCH 6/7] leftover --- test/pipelines/test_pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/pipelines/test_pipeline.py b/test/pipelines/test_pipeline.py index 985947691c..d7420748dd 100644 --- a/test/pipelines/test_pipeline.py +++ b/test/pipelines/test_pipeline.py @@ -46,7 +46,6 @@ DC_API_KEY, DC_TEST_INDEX, MockDocumentStore, - MockSeq2SegGenerator, MockRetriever, MockNode, deepset_cloud_fixture, From bac709bed70d85e9902a2cd4337e70083369327a Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 13 Dec 2023 11:37:31 +0100 Subject: [PATCH 7/7] reno --- .../notes/remove_answer_generator-e7100f82c1859fcb.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml b/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml index f551d56a72..d4180aef42 100644 --- a/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml +++ b/releasenotes/notes/remove_answer_generator-e7100f82c1859fcb.yaml @@ -1,3 +1,5 @@ --- -deprecations: - - Remove deprecated `OpenAIAnswerGenerator`, `BaseGenerator`, `GenerativeQAPipeline` and related tests. +upgrade: + - | + Remove deprecated `OpenAIAnswerGenerator`, `BaseGenerator`, `GenerativeQAPipeline` and related tests. + GenerativeQA Pipelines should use PromptNode instead. See https://haystack.deepset.ai/tutorials/22_pipeline_with_promptnode