modelcontextprotocol · kgarg2468 · May 22, 2026
diff --git a/src/fetch/src/mcp_server_fetch/__init__.py b/src/fetch/src/mcp_server_fetch/__init__.py
@@ -16,9 +16,14 @@ def main():
         help="Ignore robots.txt restrictions",
     )
     parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")
+    parser.add_argument(
+        "--no-readability",
+        action="store_true",
+        help="Disable Readability.js extraction and use the Python HTML simplifier only",
+    )
 
     args = parser.parse_args()
-    asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))
+    asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url, not args.no_readability))
 
 
 if __name__ == "__main__":

diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py
@@ -24,7 +24,7 @@
 DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
 
 
-def extract_content_from_html(html: str) -> str:
+def extract_content_from_html(html: str, use_readability: bool = True) -> str:
     """Extract and convert HTML content to Markdown format.
 
     Args:
@@ -33,16 +33,38 @@ def extract_content_from_html(html: str) -> str:
     Returns:
         Simplified markdown version of the content
     """
-    ret = readabilipy.simple_json.simple_json_from_html_string(
-        html, use_readability=True
-    )
-    if not ret["content"]:
+    try:
+        ret = readabilipy.simple_json.simple_json_from_html_string(
+            html, use_readability=use_readability
+        )
+    except Exception:
+        if not use_readability:
+            return "<error>Page failed to be simplified from HTML</error>"
+        try:
+            ret = readabilipy.simple_json.simple_json_from_html_string(
+                html, use_readability=False
+            )
+        except Exception:
+            return "<error>Page failed to be simplified from HTML</error>"
+
+    content = ret.get("content") if isinstance(ret, dict) else None
+    if not content:
+        if use_readability:
+            try:
+                fallback_ret = readabilipy.simple_json.simple_json_from_html_string(
+                    html, use_readability=False
+                )
+            except Exception:
+                fallback_ret = {}
+            content = fallback_ret.get("content") if isinstance(fallback_ret, dict) else None
+
+    if not content:
         return "<error>Page failed to be simplified from HTML</error>"
     content = markdownify.markdownify(
-        ret["content"],
+        content,
         heading_style=markdownify.ATX,
     )
-    return content
+    return content if content.strip() else "<error>Page failed to be simplified from HTML</error>"
 
 
 def get_robots_txt_url(url: str) -> str:
@@ -109,7 +131,11 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url:
 
 
 async def fetch_url(
-    url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
+    url: str,
+    user_agent: str,
+    force_raw: bool = False,
+    proxy_url: str | None = None,
+    use_readability: bool = True,
 ) -> Tuple[str, str]:
     """
     Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
@@ -140,7 +166,7 @@ async def fetch_url(
     )
 
     if is_page_html and not force_raw:
-        return extract_content_from_html(page_raw), ""
+        return extract_content_from_html(page_raw, use_readability=use_readability), ""
 
     return (
         page_raw,
@@ -182,13 +208,15 @@ async def serve(
     custom_user_agent: str | None = None,
     ignore_robots_txt: bool = False,
     proxy_url: str | None = None,
+    use_readability: bool = True,
 ) -> None:
     """Run the fetch MCP server.
 
     Args:
         custom_user_agent: Optional custom User-Agent string to use for requests
         ignore_robots_txt: Whether to ignore robots.txt restrictions
         proxy_url: Optional proxy URL to use for requests
+        use_readability: Whether to use Readability.js while simplifying HTML
     """
     server = Server("mcp-fetch")
     user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
@@ -235,7 +263,11 @@ async def call_tool(name, arguments: dict) -> list[TextContent]:
             await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)
 
         content, prefix = await fetch_url(
-            url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
+            url,
+            user_agent_autonomous,
+            force_raw=args.raw,
+            proxy_url=proxy_url,
+            use_readability=use_readability,
         )
         original_length = len(content)
         if args.start_index >= original_length:
@@ -262,7 +294,12 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
         url = arguments["url"]
 
         try:
-            content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
+            content, prefix = await fetch_url(
+                url,
+                user_agent_manual,
+                proxy_url=proxy_url,
+                use_readability=use_readability,
+            )
             # TODO: after SDK bug is addressed, don't catch the exception
         except McpError as e:
             return GetPromptResult(

diff --git a/src/fetch/tests/test_server.py b/src/fetch/tests/test_server.py
@@ -81,6 +81,45 @@ def test_html_with_links(self):
         result = extract_content_from_html(html)
         assert "Example" in result
 
+    def test_falls_back_when_readability_is_unavailable(self):
+        """Test that Readability.js failures fall back to the Python simplifier."""
+        calls = []
+
+        def simplify(html, use_readability):
+            calls.append(use_readability)
+            if use_readability:
+                raise RuntimeError("node is not available")
+            return {"content": "<main><p>Fallback content</p></main>"}
+
+        with patch(
+            "mcp_server_fetch.server.readabilipy.simple_json.simple_json_from_html_string",
+            side_effect=simplify,
+        ):
+            result = extract_content_from_html("<html><body>Fallback content</body></html>")
+
+        assert "Fallback content" in result
+        assert calls == [True, False]
+
+    def test_can_disable_readability(self):
+        """Test that callers can skip Readability.js entirely."""
+        calls = []
+
+        def simplify(html, use_readability):
+            calls.append(use_readability)
+            return {"content": "<main><p>Python-only content</p></main>"}
+
+        with patch(
+            "mcp_server_fetch.server.readabilipy.simple_json.simple_json_from_html_string",
+            side_effect=simplify,
+        ):
+            result = extract_content_from_html(
+                "<html><body>Python-only content</body></html>",
+                use_readability=False,
+            )
+
+        assert "Python-only content" in result
+        assert calls == [False]
+
     def test_empty_content_returns_error(self):
         """Test that empty/invalid HTML returns error message."""
         html = ""