@@ -23,18 +23,6 @@ class TavilyExtractorToolSchema(BaseModel):
2323 ...,
2424 description = "The URL(s) to extract data from. Can be a single URL or a list of URLs." ,
2525 )
26- include_images : Optional [bool ] = Field (
27- default = False ,
28- description = "Whether to include images in the extraction." ,
29- )
30- extract_depth : Literal ["basic" , "advanced" ] = Field (
31- default = "basic" ,
32- description = "The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction." ,
33- )
34- timeout : int = Field (
35- default = 60 ,
36- description = "The timeout for the extraction request in seconds." ,
37- )
3826
3927
4028class TavilyExtractorTool (BaseTool ):
@@ -49,26 +37,41 @@ class TavilyExtractorTool(BaseTool):
4937 args_schema: The schema for the tool's arguments.
5038 api_key: The Tavily API key.
5139 proxies: Optional proxies for the API requests.
40+ include_images: Whether to include images in the extraction.
41+ extract_depth: The depth of extraction.
42+ timeout: The timeout for the extraction request in seconds.
5243 """
5344
54- model_config = {}
55- client : TavilyClient = None
56- async_client : AsyncTavilyClient = None
45+ model_config = {"arbitrary_types_allowed" : True }
46+ client : Optional [ TavilyClient ] = None
47+ async_client : Optional [ AsyncTavilyClient ] = None
5748 name : str = "TavilyExtractorTool"
5849 description : str = (
5950 "Extracts content from one or more web pages using the Tavily API. Returns structured data."
6051 )
6152 args_schema : Type [BaseModel ] = TavilyExtractorToolSchema
6253 api_key : Optional [str ] = Field (
63- default = os .getenv ("TAVILY_API_KEY" ),
54+ default_factory = lambda : os .getenv ("TAVILY_API_KEY" ),
6455 description = "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY." ,
6556 )
6657 proxies : Optional [dict [str , str ]] = Field (
6758 default = None ,
6859 description = "Optional proxies to use for the Tavily API requests." ,
6960 )
61+ include_images : bool = Field (
62+ default = False ,
63+ description = "Whether to include images in the extraction." ,
64+ )
65+ extract_depth : Literal ["basic" , "advanced" ] = Field (
66+ default = "basic" ,
67+ description = "The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction." ,
68+ )
69+ timeout : int = Field (
70+ default = 60 ,
71+ description = "The timeout for the extraction request in seconds." ,
72+ )
7073
71- def __init__ (self , ** kwargs ):
74+ def __init__ (self , ** kwargs : Any ):
7275 """
7376 Initializes the TavilyExtractorTool.
7477
@@ -82,75 +85,84 @@ def __init__(self, **kwargs):
8285 api_key = self .api_key , proxies = self .proxies
8386 )
8487 else :
85- import click
88+ try :
89+ import click
90+ import subprocess
91+ except ImportError :
92+ raise ImportError (
93+ "The 'tavily-python' package is required. 'click' and 'subprocess' are also needed to assist with installation if the package is missing. "
94+ "Please install 'tavily-python' manually (e.g., 'pip install tavily-python') and ensure 'click' and 'subprocess' are available."
95+ )
8696
8797 if click .confirm (
88- "The 'tavily-python' package is required to use the TavilyExtractorTool. "
89- "Would you like to install it?"
98+ "You are missing the 'tavily-python' package, which is required for TavilyExtractorTool. Would you like to install it?"
9099 ):
91- import subprocess
92-
93- subprocess .run (["uv" , "add" , "tavily-python" ], check = True )
100+ try :
101+ subprocess .run (["pip" , "install" , "tavily-python" ], check = True )
102+ raise ImportError (
103+ "'tavily-python' has been installed. Please restart your Python application to use the TavilyExtractorTool."
104+ )
105+ except subprocess .CalledProcessError as e :
106+ raise ImportError (
107+ f"Attempted to install 'tavily-python' but failed: { e } . "
108+ f"Please install it manually to use the TavilyExtractorTool."
109+ )
94110 else :
95111 raise ImportError (
96112 "The 'tavily-python' package is required to use the TavilyExtractorTool. "
97- "Please install it with: uv add tavily-python"
113+ "Please install it with: pip install tavily-python"
98114 )
99115
100116 def _run (
101117 self ,
102118 urls : Union [List [str ], str ],
103- include_images : bool = False ,
104- extract_depth : Literal ["basic" , "advanced" ] = "basic" ,
105- timeout : int = 60 ,
106119 ) -> str :
107120 """
108121 Synchronously extracts content from the given URL(s).
109122
110123 Args:
111124 urls: The URL(s) to extract data from.
112- include_images: Whether to include images in the extraction.
113- extract_depth: The depth of extraction ('basic' or 'advanced').
114- timeout: The timeout for the request in seconds.
115125
116126 Returns:
117127 A JSON string containing the extracted data.
118128 """
129+ if not self .client :
130+ raise ValueError (
131+ "Tavily client is not initialized. Ensure 'tavily-python' is installed and API key is set."
132+ )
133+
119134 return json .dumps (
120135 self .client .extract (
121136 urls = urls ,
122- extract_depth = extract_depth ,
123- include_images = include_images ,
124- timeout = timeout ,
137+ extract_depth = self . extract_depth ,
138+ include_images = self . include_images ,
139+ timeout = self . timeout ,
125140 ),
126141 indent = 2 ,
127142 )
128143
129144 async def _arun (
130145 self ,
131146 urls : Union [List [str ], str ],
132- include_images : bool = False ,
133- extract_depth : Literal ["basic" , "advanced" ] = "basic" ,
134- timeout : int = 60 ,
135147 ) -> str :
136148 """
137149 Asynchronously extracts content from the given URL(s).
138150
139151 Args:
140152 urls: The URL(s) to extract data from.
141- include_images: Whether to include images in the extraction.
142- extract_depth: The depth of extraction ('basic' or 'advanced').
143- timeout: The timeout for the request in seconds.
144153
145154 Returns:
146155 A JSON string containing the extracted data.
147156 """
148- return json .dumps (
149- self .async_client .extract (
150- urls = urls ,
151- extract_depth = extract_depth ,
152- include_images = include_images ,
153- timeout = timeout ,
154- ),
155- indent = 2 ,
157+ if not self .async_client :
158+ raise ValueError (
159+ "Tavily async client is not initialized. Ensure 'tavily-python' is installed and API key is set."
160+ )
161+
162+ results = await self .async_client .extract (
163+ urls = urls ,
164+ extract_depth = self .extract_depth ,
165+ include_images = self .include_images ,
166+ timeout = self .timeout ,
156167 )
168+ return json .dumps (results , indent = 2 )
0 commit comments