Skip to content
This repository was archived by the owner on Nov 10, 2025. It is now read-only.

Commit dedbd8c

Browse files
committed
feat(tavily): enhance TavilyExtractorTool and TavilySearchTool with additional parameters and improved error handling
1 parent 1dbe397 commit dedbd8c

2 files changed

Lines changed: 211 additions & 167 deletions

File tree

crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py

Lines changed: 59 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,6 @@ class TavilyExtractorToolSchema(BaseModel):
2323
...,
2424
description="The URL(s) to extract data from. Can be a single URL or a list of URLs.",
2525
)
26-
include_images: Optional[bool] = Field(
27-
default=False,
28-
description="Whether to include images in the extraction.",
29-
)
30-
extract_depth: Literal["basic", "advanced"] = Field(
31-
default="basic",
32-
description="The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.",
33-
)
34-
timeout: int = Field(
35-
default=60,
36-
description="The timeout for the extraction request in seconds.",
37-
)
3826

3927

4028
class TavilyExtractorTool(BaseTool):
@@ -49,26 +37,41 @@ class TavilyExtractorTool(BaseTool):
4937
args_schema: The schema for the tool's arguments.
5038
api_key: The Tavily API key.
5139
proxies: Optional proxies for the API requests.
40+
include_images: Whether to include images in the extraction.
41+
extract_depth: The depth of extraction.
42+
timeout: The timeout for the extraction request in seconds.
5243
"""
5344

54-
model_config = {}
55-
client: TavilyClient = None
56-
async_client: AsyncTavilyClient = None
45+
model_config = {"arbitrary_types_allowed": True}
46+
client: Optional[TavilyClient] = None
47+
async_client: Optional[AsyncTavilyClient] = None
5748
name: str = "TavilyExtractorTool"
5849
description: str = (
5950
"Extracts content from one or more web pages using the Tavily API. Returns structured data."
6051
)
6152
args_schema: Type[BaseModel] = TavilyExtractorToolSchema
6253
api_key: Optional[str] = Field(
63-
default=os.getenv("TAVILY_API_KEY"),
54+
default_factory=lambda: os.getenv("TAVILY_API_KEY"),
6455
description="The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
6556
)
6657
proxies: Optional[dict[str, str]] = Field(
6758
default=None,
6859
description="Optional proxies to use for the Tavily API requests.",
6960
)
61+
include_images: bool = Field(
62+
default=False,
63+
description="Whether to include images in the extraction.",
64+
)
65+
extract_depth: Literal["basic", "advanced"] = Field(
66+
default="basic",
67+
description="The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.",
68+
)
69+
timeout: int = Field(
70+
default=60,
71+
description="The timeout for the extraction request in seconds.",
72+
)
7073

71-
def __init__(self, **kwargs):
74+
def __init__(self, **kwargs: Any):
7275
"""
7376
Initializes the TavilyExtractorTool.
7477
@@ -82,75 +85,84 @@ def __init__(self, **kwargs):
8285
api_key=self.api_key, proxies=self.proxies
8386
)
8487
else:
85-
import click
88+
try:
89+
import click
90+
import subprocess
91+
except ImportError:
92+
raise ImportError(
93+
"The 'tavily-python' package is required. 'click' and 'subprocess' are also needed to assist with installation if the package is missing. "
94+
"Please install 'tavily-python' manually (e.g., 'pip install tavily-python') and ensure 'click' and 'subprocess' are available."
95+
)
8696

8797
if click.confirm(
88-
"The 'tavily-python' package is required to use the TavilyExtractorTool. "
89-
"Would you like to install it?"
98+
"You are missing the 'tavily-python' package, which is required for TavilyExtractorTool. Would you like to install it?"
9099
):
91-
import subprocess
92-
93-
subprocess.run(["uv", "add", "tavily-python"], check=True)
100+
try:
101+
subprocess.run(["pip", "install", "tavily-python"], check=True)
102+
raise ImportError(
103+
"'tavily-python' has been installed. Please restart your Python application to use the TavilyExtractorTool."
104+
)
105+
except subprocess.CalledProcessError as e:
106+
raise ImportError(
107+
f"Attempted to install 'tavily-python' but failed: {e}. "
108+
f"Please install it manually to use the TavilyExtractorTool."
109+
)
94110
else:
95111
raise ImportError(
96112
"The 'tavily-python' package is required to use the TavilyExtractorTool. "
97-
"Please install it with: uv add tavily-python"
113+
"Please install it with: pip install tavily-python"
98114
)
99115

100116
def _run(
101117
self,
102118
urls: Union[List[str], str],
103-
include_images: bool = False,
104-
extract_depth: Literal["basic", "advanced"] = "basic",
105-
timeout: int = 60,
106119
) -> str:
107120
"""
108121
Synchronously extracts content from the given URL(s).
109122
110123
Args:
111124
urls: The URL(s) to extract data from.
112-
include_images: Whether to include images in the extraction.
113-
extract_depth: The depth of extraction ('basic' or 'advanced').
114-
timeout: The timeout for the request in seconds.
115125
116126
Returns:
117127
A JSON string containing the extracted data.
118128
"""
129+
if not self.client:
130+
raise ValueError(
131+
"Tavily client is not initialized. Ensure 'tavily-python' is installed and API key is set."
132+
)
133+
119134
return json.dumps(
120135
self.client.extract(
121136
urls=urls,
122-
extract_depth=extract_depth,
123-
include_images=include_images,
124-
timeout=timeout,
137+
extract_depth=self.extract_depth,
138+
include_images=self.include_images,
139+
timeout=self.timeout,
125140
),
126141
indent=2,
127142
)
128143

129144
async def _arun(
130145
self,
131146
urls: Union[List[str], str],
132-
include_images: bool = False,
133-
extract_depth: Literal["basic", "advanced"] = "basic",
134-
timeout: int = 60,
135147
) -> str:
136148
"""
137149
Asynchronously extracts content from the given URL(s).
138150
139151
Args:
140152
urls: The URL(s) to extract data from.
141-
include_images: Whether to include images in the extraction.
142-
extract_depth: The depth of extraction ('basic' or 'advanced').
143-
timeout: The timeout for the request in seconds.
144153
145154
Returns:
146155
A JSON string containing the extracted data.
147156
"""
148-
return json.dumps(
149-
self.async_client.extract(
150-
urls=urls,
151-
extract_depth=extract_depth,
152-
include_images=include_images,
153-
timeout=timeout,
154-
),
155-
indent=2,
157+
if not self.async_client:
158+
raise ValueError(
159+
"Tavily async client is not initialized. Ensure 'tavily-python' is installed and API key is set."
160+
)
161+
162+
results = await self.async_client.extract(
163+
urls=urls,
164+
extract_depth=self.extract_depth,
165+
include_images=self.include_images,
166+
timeout=self.timeout,
156167
)
168+
return json.dumps(results, indent=2)

0 commit comments

Comments
 (0)